Add support in TTS for volume and panning control of the synth output

Add two new parameters that are used when synthesizing text and playing it back directly to control the volume and left-right panning of the output. Panning is applied using a balance law, which is not energy-preserving but which doesn't lower the volume when not panning / panning to center (legacy behavior). Reduced amount of logs, and removed spoken text. In TextToSpeech.java: added convenience method to handle the setting of the cached synthesis parameters. Change-Id: I235d3d3193283ccc1891e2065d43787e3f63304d
author: Jean-Michel Trivi <jmtrivi@google.com> 2011-01-05 16:08:21 -0800
committer: Jean-Michel Trivi <jmtrivi@google.com> 2011-01-05 16:24:30 -0800
commit: 9d2d26af2e1111251f5a21213a071eb4fdc1224f (patch)
tree: 502090359174aef58fdc840b22f3f423f31c099b
parent: 2cdee233125a6cc4b00a2962d5a50273d6bb5410 (diff)
download: frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.zip
frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.tar.gz
frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.tar.bz2
4 files changed, 144 insertions, 49 deletions
diff --git a/core/java/android/speech/tts/TextToSpeech.java b/core/java/android/speech/tts/TextToSpeech.java
index 841257f..f010076 100755
--- a/core/java/android/speech/tts/TextToSpeech.java
+++ b/core/java/android/speech/tts/TextToSpeech.java
@@ -151,6 +151,23 @@ public class TextToSpeech {
         /**
          * {@hide}
          */
+        public static final float DEFAULT_VOLUME = 1.0f;
+        /**
+         * {@hide}
+         */
+        protected static final String DEFAULT_VOLUME_STRING = "1.0";
+        /**
+         * {@hide}
+         */
+        public static final float DEFAULT_PAN = 0.0f;
+        /**
+         * {@hide}
+         */
+        protected static final String DEFAULT_PAN_STRING = "0.0";
+
+        /**
+         * {@hide}
+         */
         public static final int USE_DEFAULTS = 0; // false
         /**
          * {@hide}
@@ -331,6 +348,24 @@ public class TextToSpeech {
          * @see TextToSpeech#synthesizeToFile(String, HashMap, String)
          */
         public static final String KEY_PARAM_UTTERANCE_ID = "utteranceId";
+        /**
+         * {@hide}
+         * Parameter key to specify the speech volume relative to the current stream type
+         * volume used when speaking text. Volume is specified as a float ranging from 0 to 1
+         * where 0 is silence, and 1 is the maximum volume.
+         * @see TextToSpeech#speak(String, int, HashMap)
+         * @see TextToSpeech#playEarcon(String, int, HashMap)
+         */
+        public static final String KEY_PARAM_VOLUME = "volume";
+        /**
+         * {@hide}
+         * Parameter key to specify how the speech is panned from left to right when speaking text.
+         * Pan is specified as a float ranging from -1 to +1 where -1 maps to a hard-left pan,
+         * 0 to center, and +1 to hard-right.
+         * @see TextToSpeech#speak(String, int, HashMap)
+         * @see TextToSpeech#playEarcon(String, int, HashMap)
+         */
+        public static final String KEY_PARAM_PAN = "pan";
 
         // key positions in the array of cached parameters
         /**
@@ -371,7 +406,18 @@ public class TextToSpeech {
         /**
          * {@hide}
          */
-        protected static final int NB_CACHED_PARAMS = 8;
+        protected static final int PARAM_POSITION_VOLUME = 16;
+
+        /**
+         * {@hide}
+         */
+        protected static final int PARAM_POSITION_PAN = 18;
+
+
+        /**
+         * {@hide}
+         */
+        protected static final int NB_CACHED_PARAMS = 20;
     }
 
     /**
@@ -416,6 +462,8 @@ public class TextToSpeech {
         mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID] = Engine.KEY_PARAM_UTTERANCE_ID;
         mCachedParams[Engine.PARAM_POSITION_ENGINE] = Engine.KEY_PARAM_ENGINE;
         mCachedParams[Engine.PARAM_POSITION_PITCH] = Engine.KEY_PARAM_PITCH;
+        mCachedParams[Engine.PARAM_POSITION_VOLUME] = Engine.KEY_PARAM_VOLUME;
+        mCachedParams[Engine.PARAM_POSITION_PAN] = Engine.KEY_PARAM_PAN;
 
         // Leave all defaults that are shown in Settings uninitialized/at the default
         // so that the values set in Settings will take effect if the application does
@@ -429,6 +477,8 @@ public class TextToSpeech {
         mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = "";
         mCachedParams[Engine.PARAM_POSITION_ENGINE + 1] = "";
         mCachedParams[Engine.PARAM_POSITION_PITCH + 1] = "100";
+        mCachedParams[Engine.PARAM_POSITION_VOLUME + 1] = Engine.DEFAULT_VOLUME_STRING;
+        mCachedParams[Engine.PARAM_POSITION_PAN + 1] = Engine.DEFAULT_PAN_STRING;
 
         initTts();
     }
@@ -717,24 +767,18 @@ public class TextToSpeech {
     {
         synchronized (mStartLock) {
             int result = ERROR;
-            Log.i("TTS received: ", text);
+            Log.i("TTS", "speak() queueMode=" + queueMode);
             if (!mStarted) {
                 return result;
             }
             try {
                 if ((params != null) && (!params.isEmpty())) {
-                    String extra = params.get(Engine.KEY_PARAM_STREAM);
-                    if (extra != null) {
-                        mCachedParams[Engine.PARAM_POSITION_STREAM + 1] = extra;
-                    }
-                    extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID);
-                    if (extra != null) {
-                        mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra;
-                    }
-                    extra = params.get(Engine.KEY_PARAM_ENGINE);
-                    if (extra != null) {
-                        mCachedParams[Engine.PARAM_POSITION_ENGINE + 1] = extra;
-                    }
+                    setCachedParam(params, Engine.KEY_PARAM_STREAM, Engine.PARAM_POSITION_STREAM);
+                    setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID,
+                            Engine.PARAM_POSITION_UTTERANCE_ID);
+                    setCachedParam(params, Engine.KEY_PARAM_ENGINE, Engine.PARAM_POSITION_ENGINE);
+                    setCachedParam(params, Engine.KEY_PARAM_VOLUME, Engine.PARAM_POSITION_VOLUME);
+                    setCachedParam(params, Engine.KEY_PARAM_PAN, Engine.PARAM_POSITION_PAN);
                 }
                 result = mITts.speak(mPackageName, text, queueMode, mCachedParams);
             } catch (RemoteException e) {
@@ -791,10 +835,9 @@ public class TextToSpeech {
                     if (extra != null) {
                         mCachedParams[Engine.PARAM_POSITION_STREAM + 1] = extra;
                     }
-                    extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID);
-                    if (extra != null) {
-                        mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra;
-                    }
+                    setCachedParam(params, Engine.KEY_PARAM_STREAM, Engine.PARAM_POSITION_STREAM);
+                    setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID,
+                            Engine.PARAM_POSITION_UTTERANCE_ID);
                 }
                 result = mITts.playEarcon(mPackageName, earcon, queueMode, null);
             } catch (RemoteException e) {
@@ -845,10 +888,8 @@ public class TextToSpeech {
             }
             try {
                 if ((params != null) && (!params.isEmpty())) {
-                    String extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID);
-                    if (extra != null) {
-                        mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra;
-                    }
+                    setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID,
+                            Engine.PARAM_POSITION_UTTERANCE_ID);
                 }
                 result = mITts.playSilence(mPackageName, durationInMs, queueMode, mCachedParams);
             } catch (RemoteException e) {
@@ -870,6 +911,7 @@ public class TextToSpeech {
                 mStarted = false;
                 initTts();
             } finally {
+                resetCachedParams();
                 return result;
             }
         }
@@ -1224,6 +1266,7 @@ public class TextToSpeech {
      */
     public int synthesizeToFile(String text, HashMap<String,String> params,
             String filename) {
+        Log.i("TTS", "synthesizeToFile()");
         synchronized (mStartLock) {
             int result = ERROR;
             if (!mStarted) {
@@ -1232,14 +1275,9 @@ public class TextToSpeech {
             try {
                 if ((params != null) && (!params.isEmpty())) {
                     // no need to read the stream type here
-                    String extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID);
-                    if (extra != null) {
-                        mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra;
-                    }
-                    extra = params.get(Engine.KEY_PARAM_ENGINE);
-                    if (extra != null) {
-                        mCachedParams[Engine.PARAM_POSITION_ENGINE + 1] = extra;
-                    }
+                    setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID,
+                            Engine.PARAM_POSITION_UTTERANCE_ID);
+                    setCachedParam(params, Engine.KEY_PARAM_ENGINE, Engine.PARAM_POSITION_ENGINE);
                 }
                 result = mITts.synthesizeToFile(mPackageName, text, mCachedParams, filename) ?
                         SUCCESS : ERROR;
@@ -1277,6 +1315,19 @@ public class TextToSpeech {
         mCachedParams[Engine.PARAM_POSITION_STREAM + 1] =
                 String.valueOf(Engine.DEFAULT_STREAM);
         mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID+ 1] = "";
+        mCachedParams[Engine.PARAM_POSITION_VOLUME + 1] = Engine.DEFAULT_VOLUME_STRING;
+        mCachedParams[Engine.PARAM_POSITION_PAN + 1] = Engine.DEFAULT_PAN_STRING;
+    }
+
+    /**
+     * Convenience method to save a parameter in the cached parameter array, at the given index,
+     * for a property saved in the given hashmap.
+     */
+    private void setCachedParam(HashMap<String,String> params, String key, int keyIndex) {
+        String extra = params.get(key);
+        if (extra != null) {
+            mCachedParams[keyIndex+1] = extra;
+        }
     }
 
     /**
diff --git a/packages/TtsService/jni/android_tts_SynthProxy.cpp b/packages/TtsService/jni/android_tts_SynthProxy.cpp
index 8dc88db..27d1fc0 100644
--- a/packages/TtsService/jni/android_tts_SynthProxy.cpp
+++ b/packages/TtsService/jni/android_tts_SynthProxy.cpp
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #include <unistd.h>
 
-#define LOG_TAG "SynthProxy"
+#define LOG_TAG "SynthProxyJNI"
 
 #include <utils/Log.h>
 #include <nativehelper/jni.h>
@@ -33,8 +33,8 @@
 #define DEFAULT_TTS_FORMAT      AudioSystem::PCM_16_BIT
 #define DEFAULT_TTS_NB_CHANNELS 1
 #define DEFAULT_TTS_BUFFERSIZE  2048
-// TODO use the TTS stream type when available
 #define DEFAULT_TTS_STREAM_TYPE AudioSystem::MUSIC
+#define DEFAULT_VOLUME          1.0f
 
 // EQ + BOOST parameters
 #define FILTER_LOWSHELF_ATTENUATION -18.0f // in dB
@@ -154,7 +154,7 @@ static Mutex engineMutex;
 class SynthProxyJniStorage {
     public :
         jobject                   tts_ref;
-        android_tts_engine_t*       mEngine;
+        android_tts_engine_t*     mEngine;
         void*                     mEngineLibHandle;
         AudioTrack*               mAudioOut;
         int8_t                    mPlayState;
@@ -165,6 +165,7 @@ class SynthProxyJniStorage {
         int                       mNbChannels;
         int8_t *                  mBuffer;
         size_t                    mBufferSize;
+        float                     mVolume[2];
 
         SynthProxyJniStorage() {
             tts_ref = NULL;
@@ -179,6 +180,8 @@ class SynthProxyJniStorage {
             mBufferSize = DEFAULT_TTS_BUFFERSIZE;
             mBuffer = new int8_t[mBufferSize];
             memset(mBuffer, 0, mBufferSize);
+            mVolume[AudioTrack::LEFT] = DEFAULT_VOLUME;
+            mVolume[AudioTrack::RIGHT] = DEFAULT_VOLUME;
         }
 
         ~SynthProxyJniStorage() {
@@ -189,7 +192,7 @@ class SynthProxyJniStorage {
                 mEngine = NULL;
             }
             if (mEngineLibHandle) {
-                //LOGE("~SynthProxyJniStorage(): before close library");
+                //LOGV("~SynthProxyJniStorage(): before close library");
                 int res = dlclose(mEngineLibHandle);
                 LOGE_IF( res != 0, "~SynthProxyJniStorage(): dlclose returned %d", res);
             }
@@ -241,7 +244,7 @@ class SynthProxyJniStorage {
               mAudioOut = NULL;
             } else {
               //LOGI("AudioTrack OK");
-              mAudioOut->setVolume(1.0f, 1.0f);
+              mAudioOut->setVolume(mVolume[AudioTrack::LEFT], mVolume[AudioTrack::RIGHT]);
               LOGV("AudioTrack ready");
             }
             mPlayLock.unlock();
@@ -277,7 +280,7 @@ extern "C" android_tts_callback_status_t
 __ttsSynthDoneCB(void ** pUserdata, uint32_t rate,
                android_tts_audio_format_t format, int channel,
                int8_t **pWav, size_t *pBufferSize,
-               android_tts_synth_status_t status) 
+               android_tts_synth_status_t status)
 {
     //LOGV("ttsSynthDoneCallback: %d bytes", bufferSize);
     AudioSystem::audio_format  encoding;
@@ -618,7 +621,7 @@ android_tts_SynthProxy_setSpeechRate(JNIEnv *env, jobject thiz, jint jniData,
     Mutex::Autolock l(engineMutex);
 
     SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
-    LOGI("setting speech rate to %d", speechRate);
+    //LOGI("setting speech rate to %d", speechRate);
     android_tts_engine_t *engine = pSynthData->mEngine;
 
     if (engine) {
@@ -647,7 +650,7 @@ android_tts_SynthProxy_setPitch(JNIEnv *env, jobject thiz, jint jniData,
     sprintf(buffer, "%d", pitch);
 
     SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
-    LOGI("setting pitch to %d", pitch);
+    //LOGI("setting pitch to %d", pitch);
     android_tts_engine_t *engine = pSynthData->mEngine;
 
     if (engine) {
@@ -783,7 +786,7 @@ android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData,
 
 static int
 android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
-        jstring textJavaString, jint javaStreamType)
+        jstring textJavaString, jint javaStreamType, jfloat volume, jfloat pan)
 {
     int result = ANDROID_TTS_FAILURE;
 
@@ -798,9 +801,34 @@ android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
 
     SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
 
-    pSynthData->mPlayLock.lock();
-    pSynthData->mPlayState = SYNTHPLAYSTATE_IS_PLAYING;
-    pSynthData->mPlayLock.unlock();
+    {//scope for lock on mPlayLock
+        Mutex::Autolock _l(pSynthData->mPlayLock);
+
+        pSynthData->mPlayState = SYNTHPLAYSTATE_IS_PLAYING;
+
+        // clip volume and pan
+        float vol = (volume > 1.0f) ? 1.0f : (volume < 0.0f) ? 0.0f : volume;
+        float panning = (pan > 1.0f) ? 1.0f : (pan < -1.0f) ? -1.0f : pan;
+        // compute playback volume based on volume and pan, using balance rule, in order to avoid
+        // lowering volume when panning in center
+        pSynthData->mVolume[AudioTrack::LEFT] = vol;
+        pSynthData->mVolume[AudioTrack::RIGHT] = vol;
+        if (panning > 0.0f) {
+            pSynthData->mVolume[AudioTrack::LEFT] *= (1.0f - panning);
+        } else if (panning < 0.0f) {
+            pSynthData->mVolume[AudioTrack::RIGHT] *= (1.0f + panning);
+        }
+
+        // apply the volume if there is an output
+        if (NULL != pSynthData->mAudioOut) {
+            pSynthData->mAudioOut->setVolume(pSynthData->mVolume[AudioTrack::LEFT],
+                    pSynthData->mVolume[AudioTrack::RIGHT]);
+        }
+
+        //LOGV("android_tts_SynthProxy_speak() vol=%.3f pan=%.3f, mVolume=[%.1f %.1f]",
+        //        volume, pan,
+        //        pSynthData->mVolume[AudioTrack::LEFT], pSynthData->mVolume[AudioTrack::RIGHT]);
+    }
 
     afterSynthData_t* pForAfter = new (afterSynthData_t);
     pForAfter->jniStorage = jniData;
@@ -935,7 +963,7 @@ static JNINativeMethod gMethods[] = {
         (void*)android_tts_SynthProxy_stopSync
     },
     {   "native_speak",
-        "(ILjava/lang/String;I)I",
+        "(ILjava/lang/String;IFF)I",
         (void*)android_tts_SynthProxy_speak
     },
     {   "native_synthesizeToFile",
diff --git a/packages/TtsService/src/android/tts/SynthProxy.java b/packages/TtsService/src/android/tts/SynthProxy.java
index 525a504..f5f5fcf 100755
--- a/packages/TtsService/src/android/tts/SynthProxy.java
+++ b/packages/TtsService/src/android/tts/SynthProxy.java
@@ -78,12 +78,13 @@ public class SynthProxy {
     /**
      * Synthesize speech and speak it directly using AudioTrack.
      */
-    public int speak(String text, int streamType) {
+    public int speak(String text, int streamType, float volume, float pan) {
+        Log.i(TAG, "speak() on stream "+ streamType);
         if ((streamType > -1) && (streamType < AudioSystem.getNumStreamTypes())) {
-            return native_speak(mJniData, text, streamType);
+            return native_speak(mJniData, text, streamType, volume, pan);
         } else {
             Log.e("SynthProxy", "Trying to speak with invalid stream type " + streamType);
-            return native_speak(mJniData, text, AudioManager.STREAM_MUSIC);
+            return native_speak(mJniData, text, AudioManager.STREAM_MUSIC, volume, pan);
         }
     }
 
@@ -93,6 +94,7 @@ public class SynthProxy {
      * "/sdcard/???.wav" is recommended.
      */
     public int synthesizeToFile(String text, String filename) {
+        Log.i(TAG, "synthesizeToFile() to file "+ filename);
         return native_synthesizeToFile(mJniData, text, filename);
     }
 
@@ -192,7 +194,8 @@ public class SynthProxy {
 
     private native final int native_stopSync(int jniData);
 
-    private native final int native_speak(int jniData, String text, int streamType);
+    private native final int native_speak(int jniData, String text, int streamType, float volume,
+            float pan);
 
     private native final int native_synthesizeToFile(int jniData, String text, String filename);
 
diff --git a/packages/TtsService/src/android/tts/TtsService.java b/packages/TtsService/src/android/tts/TtsService.java
index 08bbfb2..c562327 100755
--- a/packages/TtsService/src/android/tts/TtsService.java
+++ b/packages/TtsService/src/android/tts/TtsService.java
@@ -121,7 +121,6 @@ public class TtsService extends Service implements OnCompletionListener {
     private static final int SPEECHQUEUELOCK_TIMEOUT = 5000;
     private static final int MAX_SPEECH_ITEM_CHAR_LENGTH = 4000;
     private static final int MAX_FILENAME_LENGTH = 250;
-    // TODO use the TTS stream type when available
     private static final int DEFAULT_STREAM_TYPE = AudioManager.STREAM_MUSIC;
     // TODO use TextToSpeech.DEFAULT_SYNTH once it is unhidden
     private static final String DEFAULT_SYNTH = "com.svox.pico";
@@ -791,6 +790,8 @@ public class TtsService extends Service implements OnCompletionListener {
                     String speechRate = "";
                     String engine = "";
                     String pitch = "";
+                    float volume = TextToSpeech.Engine.DEFAULT_VOLUME;
+                    float pan = TextToSpeech.Engine.DEFAULT_PAN;
                     if (speechItem.mParams != null){
                         for (int i = 0; i < speechItem.mParams.size() - 1; i = i + 2){
                             String param = speechItem.mParams.get(i);
@@ -816,6 +817,18 @@ public class TtsService extends Service implements OnCompletionListener {
                                     engine = speechItem.mParams.get(i + 1);
                                 } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_PITCH)) {
                                     pitch = speechItem.mParams.get(i + 1);
+                                } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_VOLUME)) {
+                                    try {
+                                        volume = Float.parseFloat(speechItem.mParams.get(i + 1));
+                                    } catch (NumberFormatException e) {
+                                        volume = TextToSpeech.Engine.DEFAULT_VOLUME;
+                                    }
+                                } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_PAN)) {
+                                    try {
+                                        pan = Float.parseFloat(speechItem.mParams.get(i + 1));
+                                    } catch (NumberFormatException e) {
+                                        pan = TextToSpeech.Engine.DEFAULT_PAN;
+                                    }
                                 }
                             }
                         }
@@ -844,7 +857,7 @@ public class TtsService extends Service implements OnCompletionListener {
                             setPitch("", getDefaultPitch());
                         }
                         try {
-                            sNativeSynth.speak(speechItem.mText, streamType);
+                            sNativeSynth.speak(speechItem.mText, streamType, volume, pan);
                         } catch (NullPointerException e) {
                             // synth will become null during onDestroy()
                             Log.v(SERVICE_TAG, " null synth, can't speak");
author	Jean-Michel Trivi <jmtrivi@google.com>	2011-01-05 16:08:21 -0800
committer	Jean-Michel Trivi <jmtrivi@google.com>	2011-01-05 16:24:30 -0800
commit	9d2d26af2e1111251f5a21213a071eb4fdc1224f (patch)
tree	502090359174aef58fdc840b22f3f423f31c099b
parent	2cdee233125a6cc4b00a2962d5a50273d6bb5410 (diff)
download	frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.zip frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.tar.gz frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.tar.bz2