Add support in TTS for volume and panning control of the synth output

Add two new parameters that are used when synthesizing text and playing it back directly to control the volume and left-right panning of the output. Panning is applied using a balance law, which is not energy-preserving but which doesn't lower the volume when not panning / panning to center (legacy behavior). Reduced amount of logs, and removed spoken text. In TextToSpeech.java: added convenience method to handle the setting of the cached synthesis parameters. Change-Id: I235d3d3193283ccc1891e2065d43787e3f63304d
author: Jean-Michel Trivi <jmtrivi@google.com> 2011-01-05 16:08:21 -0800
committer: Jean-Michel Trivi <jmtrivi@google.com> 2011-01-05 16:24:30 -0800
commit: 9d2d26af2e1111251f5a21213a071eb4fdc1224f (patch)
tree: 502090359174aef58fdc840b22f3f423f31c099b /packages
parent: 2cdee233125a6cc4b00a2962d5a50273d6bb5410 (diff)
download: frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.zip
frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.tar.gz
frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.tar.bz2
3 files changed, 63 insertions, 19 deletions
diff --git a/packages/TtsService/jni/android_tts_SynthProxy.cpp b/packages/TtsService/jni/android_tts_SynthProxy.cpp
index 8dc88db..27d1fc0 100644
--- a/packages/TtsService/jni/android_tts_SynthProxy.cpp
+++ b/packages/TtsService/jni/android_tts_SynthProxy.cpp
@@ -17,7 +17,7 @@
 #include <stdio.h>
 #include <unistd.h>
 
-#define LOG_TAG "SynthProxy"
+#define LOG_TAG "SynthProxyJNI"
 
 #include <utils/Log.h>
 #include <nativehelper/jni.h>
@@ -33,8 +33,8 @@
 #define DEFAULT_TTS_FORMAT      AudioSystem::PCM_16_BIT
 #define DEFAULT_TTS_NB_CHANNELS 1
 #define DEFAULT_TTS_BUFFERSIZE  2048
-// TODO use the TTS stream type when available
 #define DEFAULT_TTS_STREAM_TYPE AudioSystem::MUSIC
+#define DEFAULT_VOLUME          1.0f
 
 // EQ + BOOST parameters
 #define FILTER_LOWSHELF_ATTENUATION -18.0f // in dB
@@ -154,7 +154,7 @@ static Mutex engineMutex;
 class SynthProxyJniStorage {
     public :
         jobject                   tts_ref;
-        android_tts_engine_t*       mEngine;
+        android_tts_engine_t*     mEngine;
         void*                     mEngineLibHandle;
         AudioTrack*               mAudioOut;
         int8_t                    mPlayState;
@@ -165,6 +165,7 @@ class SynthProxyJniStorage {
         int                       mNbChannels;
         int8_t *                  mBuffer;
         size_t                    mBufferSize;
+        float                     mVolume[2];
 
         SynthProxyJniStorage() {
             tts_ref = NULL;
@@ -179,6 +180,8 @@ class SynthProxyJniStorage {
             mBufferSize = DEFAULT_TTS_BUFFERSIZE;
             mBuffer = new int8_t[mBufferSize];
             memset(mBuffer, 0, mBufferSize);
+            mVolume[AudioTrack::LEFT] = DEFAULT_VOLUME;
+            mVolume[AudioTrack::RIGHT] = DEFAULT_VOLUME;
         }
 
         ~SynthProxyJniStorage() {
@@ -189,7 +192,7 @@ class SynthProxyJniStorage {
                 mEngine = NULL;
             }
             if (mEngineLibHandle) {
-                //LOGE("~SynthProxyJniStorage(): before close library");
+                //LOGV("~SynthProxyJniStorage(): before close library");
                 int res = dlclose(mEngineLibHandle);
                 LOGE_IF( res != 0, "~SynthProxyJniStorage(): dlclose returned %d", res);
             }
@@ -241,7 +244,7 @@ class SynthProxyJniStorage {
               mAudioOut = NULL;
             } else {
               //LOGI("AudioTrack OK");
-              mAudioOut->setVolume(1.0f, 1.0f);
+              mAudioOut->setVolume(mVolume[AudioTrack::LEFT], mVolume[AudioTrack::RIGHT]);
               LOGV("AudioTrack ready");
             }
             mPlayLock.unlock();
@@ -277,7 +280,7 @@ extern "C" android_tts_callback_status_t
 __ttsSynthDoneCB(void ** pUserdata, uint32_t rate,
                android_tts_audio_format_t format, int channel,
                int8_t **pWav, size_t *pBufferSize,
-               android_tts_synth_status_t status) 
+               android_tts_synth_status_t status)
 {
     //LOGV("ttsSynthDoneCallback: %d bytes", bufferSize);
     AudioSystem::audio_format  encoding;
@@ -618,7 +621,7 @@ android_tts_SynthProxy_setSpeechRate(JNIEnv *env, jobject thiz, jint jniData,
     Mutex::Autolock l(engineMutex);
 
     SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
-    LOGI("setting speech rate to %d", speechRate);
+    //LOGI("setting speech rate to %d", speechRate);
     android_tts_engine_t *engine = pSynthData->mEngine;
 
     if (engine) {
@@ -647,7 +650,7 @@ android_tts_SynthProxy_setPitch(JNIEnv *env, jobject thiz, jint jniData,
     sprintf(buffer, "%d", pitch);
 
     SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
-    LOGI("setting pitch to %d", pitch);
+    //LOGI("setting pitch to %d", pitch);
     android_tts_engine_t *engine = pSynthData->mEngine;
 
     if (engine) {
@@ -783,7 +786,7 @@ android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData,
 
 static int
 android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
-        jstring textJavaString, jint javaStreamType)
+        jstring textJavaString, jint javaStreamType, jfloat volume, jfloat pan)
 {
     int result = ANDROID_TTS_FAILURE;
 
@@ -798,9 +801,34 @@ android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
 
     SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
 
-    pSynthData->mPlayLock.lock();
-    pSynthData->mPlayState = SYNTHPLAYSTATE_IS_PLAYING;
-    pSynthData->mPlayLock.unlock();
+    {//scope for lock on mPlayLock
+        Mutex::Autolock _l(pSynthData->mPlayLock);
+
+        pSynthData->mPlayState = SYNTHPLAYSTATE_IS_PLAYING;
+
+        // clip volume and pan
+        float vol = (volume > 1.0f) ? 1.0f : (volume < 0.0f) ? 0.0f : volume;
+        float panning = (pan > 1.0f) ? 1.0f : (pan < -1.0f) ? -1.0f : pan;
+        // compute playback volume based on volume and pan, using balance rule, in order to avoid
+        // lowering volume when panning in center
+        pSynthData->mVolume[AudioTrack::LEFT] = vol;
+        pSynthData->mVolume[AudioTrack::RIGHT] = vol;
+        if (panning > 0.0f) {
+            pSynthData->mVolume[AudioTrack::LEFT] *= (1.0f - panning);
+        } else if (panning < 0.0f) {
+            pSynthData->mVolume[AudioTrack::RIGHT] *= (1.0f + panning);
+        }
+
+        // apply the volume if there is an output
+        if (NULL != pSynthData->mAudioOut) {
+            pSynthData->mAudioOut->setVolume(pSynthData->mVolume[AudioTrack::LEFT],
+                    pSynthData->mVolume[AudioTrack::RIGHT]);
+        }
+
+        //LOGV("android_tts_SynthProxy_speak() vol=%.3f pan=%.3f, mVolume=[%.1f %.1f]",
+        //        volume, pan,
+        //        pSynthData->mVolume[AudioTrack::LEFT], pSynthData->mVolume[AudioTrack::RIGHT]);
+    }
 
     afterSynthData_t* pForAfter = new (afterSynthData_t);
     pForAfter->jniStorage = jniData;
@@ -935,7 +963,7 @@ static JNINativeMethod gMethods[] = {
         (void*)android_tts_SynthProxy_stopSync
     },
     {   "native_speak",
-        "(ILjava/lang/String;I)I",
+        "(ILjava/lang/String;IFF)I",
         (void*)android_tts_SynthProxy_speak
     },
     {   "native_synthesizeToFile",
diff --git a/packages/TtsService/src/android/tts/SynthProxy.java b/packages/TtsService/src/android/tts/SynthProxy.java
index 525a504..f5f5fcf 100755
--- a/packages/TtsService/src/android/tts/SynthProxy.java
+++ b/packages/TtsService/src/android/tts/SynthProxy.java
@@ -78,12 +78,13 @@ public class SynthProxy {
     /**
      * Synthesize speech and speak it directly using AudioTrack.
      */
-    public int speak(String text, int streamType) {
+    public int speak(String text, int streamType, float volume, float pan) {
+        Log.i(TAG, "speak() on stream "+ streamType);
         if ((streamType > -1) && (streamType < AudioSystem.getNumStreamTypes())) {
-            return native_speak(mJniData, text, streamType);
+            return native_speak(mJniData, text, streamType, volume, pan);
         } else {
             Log.e("SynthProxy", "Trying to speak with invalid stream type " + streamType);
-            return native_speak(mJniData, text, AudioManager.STREAM_MUSIC);
+            return native_speak(mJniData, text, AudioManager.STREAM_MUSIC, volume, pan);
         }
     }
 
@@ -93,6 +94,7 @@ public class SynthProxy {
      * "/sdcard/???.wav" is recommended.
      */
     public int synthesizeToFile(String text, String filename) {
+        Log.i(TAG, "synthesizeToFile() to file "+ filename);
         return native_synthesizeToFile(mJniData, text, filename);
     }
 
@@ -192,7 +194,8 @@ public class SynthProxy {
 
     private native final int native_stopSync(int jniData);
 
-    private native final int native_speak(int jniData, String text, int streamType);
+    private native final int native_speak(int jniData, String text, int streamType, float volume,
+            float pan);
 
     private native final int native_synthesizeToFile(int jniData, String text, String filename);
 
diff --git a/packages/TtsService/src/android/tts/TtsService.java b/packages/TtsService/src/android/tts/TtsService.java
index 08bbfb2..c562327 100755
--- a/packages/TtsService/src/android/tts/TtsService.java
+++ b/packages/TtsService/src/android/tts/TtsService.java
@@ -121,7 +121,6 @@ public class TtsService extends Service implements OnCompletionListener {
     private static final int SPEECHQUEUELOCK_TIMEOUT = 5000;
     private static final int MAX_SPEECH_ITEM_CHAR_LENGTH = 4000;
     private static final int MAX_FILENAME_LENGTH = 250;
-    // TODO use the TTS stream type when available
     private static final int DEFAULT_STREAM_TYPE = AudioManager.STREAM_MUSIC;
     // TODO use TextToSpeech.DEFAULT_SYNTH once it is unhidden
     private static final String DEFAULT_SYNTH = "com.svox.pico";
@@ -791,6 +790,8 @@ public class TtsService extends Service implements OnCompletionListener {
                     String speechRate = "";
                     String engine = "";
                     String pitch = "";
+                    float volume = TextToSpeech.Engine.DEFAULT_VOLUME;
+                    float pan = TextToSpeech.Engine.DEFAULT_PAN;
                     if (speechItem.mParams != null){
                         for (int i = 0; i < speechItem.mParams.size() - 1; i = i + 2){
                             String param = speechItem.mParams.get(i);
@@ -816,6 +817,18 @@ public class TtsService extends Service implements OnCompletionListener {
                                     engine = speechItem.mParams.get(i + 1);
                                 } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_PITCH)) {
                                     pitch = speechItem.mParams.get(i + 1);
+                                } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_VOLUME)) {
+                                    try {
+                                        volume = Float.parseFloat(speechItem.mParams.get(i + 1));
+                                    } catch (NumberFormatException e) {
+                                        volume = TextToSpeech.Engine.DEFAULT_VOLUME;
+                                    }
+                                } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_PAN)) {
+                                    try {
+                                        pan = Float.parseFloat(speechItem.mParams.get(i + 1));
+                                    } catch (NumberFormatException e) {
+                                        pan = TextToSpeech.Engine.DEFAULT_PAN;
+                                    }
                                 }
                             }
                         }
@@ -844,7 +857,7 @@ public class TtsService extends Service implements OnCompletionListener {
                             setPitch("", getDefaultPitch());
                         }
                         try {
-                            sNativeSynth.speak(speechItem.mText, streamType);
+                            sNativeSynth.speak(speechItem.mText, streamType, volume, pan);
                         } catch (NullPointerException e) {
                             // synth will become null during onDestroy()
                             Log.v(SERVICE_TAG, " null synth, can't speak");
author	Jean-Michel Trivi <jmtrivi@google.com>	2011-01-05 16:08:21 -0800
committer	Jean-Michel Trivi <jmtrivi@google.com>	2011-01-05 16:24:30 -0800
commit	9d2d26af2e1111251f5a21213a071eb4fdc1224f (patch)
tree	502090359174aef58fdc840b22f3f423f31c099b /packages
parent	2cdee233125a6cc4b00a2962d5a50273d6bb5410 (diff)
download	frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.zip frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.tar.gz frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.tar.bz2