Fix bug 2046705 where the output of the speech synthesizer is too low.

The language files for the SVOX Pico engine result in the output of the synthesizer to be too soft, and barely audible on a phone speaker. The change implements a low shelf filter on the output of the synthesis and a drastic amplification. This works as intended because the synthesized data contains too much energy in the lower frequencies that is wasted on a phone speaker. Once filtered out, they leave room for amplication to address the volume issue.
author: Jean-Michel Trivi <jmtrivi@google.com> 2009-08-13 19:05:45 -0700
committer: Jean-Michel Trivi <jmtrivi@google.com> 2009-08-14 10:05:19 -0700
commit: 4fb7d882b27a71cc1d15ccaf130a0ccd201ae2e0 (patch)
tree: e5af26a429ca3ff7e65eef94d8b011604a906f75 /packages
parent: b8546001701405a76dad7e6235046e592296fac2 (diff)
download: frameworks_base-4fb7d882b27a71cc1d15ccaf130a0ccd201ae2e0.zip
frameworks_base-4fb7d882b27a71cc1d15ccaf130a0ccd201ae2e0.tar.gz
frameworks_base-4fb7d882b27a71cc1d15ccaf130a0ccd201ae2e0.tar.bz2
1 files changed, 93 insertions, 3 deletions
diff --git a/packages/TtsService/jni/android_tts_SynthProxy.cpp b/packages/TtsService/jni/android_tts_SynthProxy.cpp
index 99d7723..1bab717 100644
--- a/packages/TtsService/jni/android_tts_SynthProxy.cpp
+++ b/packages/TtsService/jni/android_tts_SynthProxy.cpp
@@ -26,16 +26,25 @@
 #include <android_runtime/AndroidRuntime.h>
 #include <tts/TtsEngine.h>
 #include <media/AudioTrack.h>
+#include <math.h>
 
 #include <dlfcn.h>
 
 #define DEFAULT_TTS_RATE        16000
 #define DEFAULT_TTS_FORMAT      AudioSystem::PCM_16_BIT
 #define DEFAULT_TTS_NB_CHANNELS 1
-#define DEFAULT_TTS_BUFFERSIZE  1024
+#define DEFAULT_TTS_BUFFERSIZE  2048
 // TODO use the TTS stream type when available
 #define DEFAULT_TTS_STREAM_TYPE AudioSystem::MUSIC
 
+// EQ + BOOST parameters
+#define FILTER_LOWSHELF_ATTENUATION -18.0f // in dB
+#define FILTER_TRANSITION_FREQ 1100.0f     // in Hz
+#define FILTER_SHELF_SLOPE 1.0f            // Q
+#define FILTER_GAIN 6.0f // linear gain
+// such a huge gain is justified by how much energy in the low frequencies is "wasted" at the output
+// of the synthesis. The low shelving filter removes it, leaving room for amplification.
+
 #define USAGEMODE_PLAY_IMMEDIATELY 0
 #define USAGEMODE_WRITE_TO_FILE    1
 
@@ -57,6 +66,79 @@ struct afterSynthData_t {
 };
 
 // ----------------------------------------------------------------------------
+// EQ data
+double amp;
+double w;
+double sinw;
+double cosw;
+double beta;
+double a0, a1, a2, b0, b1, b2;
+double m_fa, m_fb, m_fc, m_fd, m_fe;
+double x0;  // x[n]
+double x1;  // x[n-1]
+double x2;  // x[n-2]
+double out0;// y[n]
+double out1;// y[n-1]
+double out2;// y[n-2]
+
+void initializeEQ() {
+
+    amp = float(pow(10.0, FILTER_LOWSHELF_ATTENUATION / 40.0));
+    w = 2.0 * M_PI * (FILTER_TRANSITION_FREQ / DEFAULT_TTS_RATE);
+    sinw = float(sin(w));
+    cosw = float(cos(w));
+    beta = float(sqrt(amp)/FILTER_SHELF_SLOPE);
+
+    // initialize low-shelf parameters
+    b0 = amp * ((amp+1.0F) - ((amp-1.0F)*cosw) + (beta*sinw));
+    b1 = 2.0F * amp * ((amp-1.0F) - ((amp+1.0F)*cosw));
+    b2 = amp * ((amp+1.0F) - ((amp-1.0F)*cosw) - (beta*sinw));
+    a0 = (amp+1.0F) + ((amp-1.0F)*cosw) + (beta*sinw);
+    a1 = 2.0F * ((amp-1.0F) + ((amp+1.0F)*cosw));
+    a2 = -((amp+1.0F) + ((amp-1.0F)*cosw) - (beta*sinw));
+
+    m_fa = FILTER_GAIN * b0/a0;
+    m_fb = FILTER_GAIN * b1/a0;
+    m_fc = FILTER_GAIN * b2/a0;
+    m_fd = a1/a0;
+    m_fe = a2/a0;
+}
+
+void initializeFilter() {
+    x0 = 0.0f;
+    x1 = 0.0f;
+    x2 = 0.0f;
+    out0 = 0.0f;
+    out1 = 0.0f;
+    out2 = 0.0f;
+}
+
+void applyFilter(int16_t* buffer, size_t sampleCount) {
+
+    for (size_t i=0 ; i<sampleCount ; i++) {
+
+        x0 = (double) buffer[i];
+
+        out0 = (m_fa*x0) + (m_fb*x1) + (m_fc*x2) + (m_fd*out1) + (m_fe*out2);
+
+        x2 = x1;
+        x1 = x0;
+
+        out2 = out1;
+        out1 = out0;
+
+        if (out0 > 32767.0f) {
+            buffer[i] = 32767;
+        } else if (out0 < -32768.0f) {
+            buffer[i] = -32768;
+        } else {
+            buffer[i] = (int16_t) out0;
+        }
+    }
+}
+
+
+// ----------------------------------------------------------------------------
 static fields_t javaTTSFields;
 
 // TODO move to synth member once we have multiple simultaneous engines running
@@ -198,12 +280,13 @@ static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate,
 
         if (wav == NULL) {
             delete pForAfter;
-            LOGI("Null: speech has completed");
+            LOGV("Null: speech has completed");
         }
 
         if (bufferSize > 0) {
             prepAudioTrack(pJniData, pForAfter->streamType, rate, format, channel);
             if (pJniData->mAudioOut) {
+                applyFilter((int16_t*)wav, bufferSize/2);
                 pJniData->mAudioOut->write(wav, bufferSize);
                 memset(wav, 0, bufferSize);
                 //LOGV("AudioTrack wrote: %d bytes", bufferSize);
@@ -212,13 +295,14 @@ static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate,
             }
         }
     } else  if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) {
-        LOGV("Save to file");
+        //LOGV("Save to file");
         if (wav == NULL) {
             delete pForAfter;
             LOGV("Null: speech has completed");
             return TTS_CALLBACK_HALT;
         }
         if (bufferSize > 0){
+            applyFilter((int16_t*)wav, bufferSize/2);
             fwrite(wav, 1, bufferSize, pForAfter->outputFile);
             memset(wav, 0, bufferSize);
         }
@@ -289,6 +373,8 @@ android_tts_SynthProxy_native_setup(JNIEnv *env, jobject thiz,
     env->SetIntField(thiz, javaTTSFields.synthProxyFieldJniData,
             (int)pJniStorage);
 
+    initializeEQ();
+
     env->ReleaseStringUTFChars(nativeSoLib, nativeSoLibNativeString);
 }
 
@@ -479,6 +565,8 @@ android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData,
         return result;
     }
 
+    initializeFilter();
+
     Mutex::Autolock l(engineMutex);
 
     // Retrieve audio parameters before writing the file header
@@ -583,6 +671,8 @@ android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
         return result;
     }
 
+    initializeFilter();
+
     Mutex::Autolock l(engineMutex);
 
     SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;
author	Jean-Michel Trivi <jmtrivi@google.com>	2009-08-13 19:05:45 -0700
committer	Jean-Michel Trivi <jmtrivi@google.com>	2009-08-14 10:05:19 -0700
commit	4fb7d882b27a71cc1d15ccaf130a0ccd201ae2e0 (patch)
tree	e5af26a429ca3ff7e65eef94d8b011604a906f75 /packages
parent	b8546001701405a76dad7e6235046e592296fac2 (diff)
download	frameworks_base-4fb7d882b27a71cc1d15ccaf130a0ccd201ae2e0.zip frameworks_base-4fb7d882b27a71cc1d15ccaf130a0ccd201ae2e0.tar.gz frameworks_base-4fb7d882b27a71cc1d15ccaf130a0ccd201ae2e0.tar.bz2