summaryrefslogtreecommitdiffstats
path: root/packages
diff options
context:
space:
mode:
authorJean-Michel Trivi <jmtrivi@google.com>2009-08-13 19:05:45 -0700
committerJean-Michel Trivi <jmtrivi@google.com>2009-08-14 10:05:19 -0700
commit4fb7d882b27a71cc1d15ccaf130a0ccd201ae2e0 (patch)
treee5af26a429ca3ff7e65eef94d8b011604a906f75 /packages
parentb8546001701405a76dad7e6235046e592296fac2 (diff)
downloadframeworks_base-4fb7d882b27a71cc1d15ccaf130a0ccd201ae2e0.zip
frameworks_base-4fb7d882b27a71cc1d15ccaf130a0ccd201ae2e0.tar.gz
frameworks_base-4fb7d882b27a71cc1d15ccaf130a0ccd201ae2e0.tar.bz2
Fix bug 2046705 where the output of the speech synthesizer is too low.
The language files for the SVOX Pico engine result in the output of the synthesizer to be too soft, and barely audible on a phone speaker. The change implements a low shelf filter on the output of the synthesis and a drastic amplification. This works as intended because the synthesized data contains too much energy in the lower frequencies that is wasted on a phone speaker. Once filtered out, they leave room for amplication to address the volume issue.
Diffstat (limited to 'packages')
-rw-r--r--packages/TtsService/jni/android_tts_SynthProxy.cpp96
1 files changed, 93 insertions, 3 deletions
diff --git a/packages/TtsService/jni/android_tts_SynthProxy.cpp b/packages/TtsService/jni/android_tts_SynthProxy.cpp
index 99d7723..1bab717 100644
--- a/packages/TtsService/jni/android_tts_SynthProxy.cpp
+++ b/packages/TtsService/jni/android_tts_SynthProxy.cpp
@@ -26,16 +26,25 @@
#include <android_runtime/AndroidRuntime.h>
#include <tts/TtsEngine.h>
#include <media/AudioTrack.h>
+#include <math.h>
#include <dlfcn.h>
#define DEFAULT_TTS_RATE 16000
#define DEFAULT_TTS_FORMAT AudioSystem::PCM_16_BIT
#define DEFAULT_TTS_NB_CHANNELS 1
-#define DEFAULT_TTS_BUFFERSIZE 1024
+#define DEFAULT_TTS_BUFFERSIZE 2048
// TODO use the TTS stream type when available
#define DEFAULT_TTS_STREAM_TYPE AudioSystem::MUSIC
+// EQ + BOOST parameters
+#define FILTER_LOWSHELF_ATTENUATION -18.0f // in dB
+#define FILTER_TRANSITION_FREQ 1100.0f // in Hz
+#define FILTER_SHELF_SLOPE 1.0f // Q
+#define FILTER_GAIN 6.0f // linear gain
+// such a huge gain is justified by how much energy in the low frequencies is "wasted" at the output
+// of the synthesis. The low shelving filter removes it, leaving room for amplification.
+
#define USAGEMODE_PLAY_IMMEDIATELY 0
#define USAGEMODE_WRITE_TO_FILE 1
@@ -57,6 +66,79 @@ struct afterSynthData_t {
};
// ----------------------------------------------------------------------------
+// EQ data
+double amp;
+double w;
+double sinw;
+double cosw;
+double beta;
+double a0, a1, a2, b0, b1, b2;
+double m_fa, m_fb, m_fc, m_fd, m_fe;
+double x0; // x[n]
+double x1; // x[n-1]
+double x2; // x[n-2]
+double out0;// y[n]
+double out1;// y[n-1]
+double out2;// y[n-2]
+
+void initializeEQ() {
+
+ amp = float(pow(10.0, FILTER_LOWSHELF_ATTENUATION / 40.0));
+ w = 2.0 * M_PI * (FILTER_TRANSITION_FREQ / DEFAULT_TTS_RATE);
+ sinw = float(sin(w));
+ cosw = float(cos(w));
+ beta = float(sqrt(amp)/FILTER_SHELF_SLOPE);
+
+ // initialize low-shelf parameters
+ b0 = amp * ((amp+1.0F) - ((amp-1.0F)*cosw) + (beta*sinw));
+ b1 = 2.0F * amp * ((amp-1.0F) - ((amp+1.0F)*cosw));
+ b2 = amp * ((amp+1.0F) - ((amp-1.0F)*cosw) - (beta*sinw));
+ a0 = (amp+1.0F) + ((amp-1.0F)*cosw) + (beta*sinw);
+ a1 = 2.0F * ((amp-1.0F) + ((amp+1.0F)*cosw));
+ a2 = -((amp+1.0F) + ((amp-1.0F)*cosw) - (beta*sinw));
+
+ m_fa = FILTER_GAIN * b0/a0;
+ m_fb = FILTER_GAIN * b1/a0;
+ m_fc = FILTER_GAIN * b2/a0;
+ m_fd = a1/a0;
+ m_fe = a2/a0;
+}
+
+void initializeFilter() {
+ x0 = 0.0f;
+ x1 = 0.0f;
+ x2 = 0.0f;
+ out0 = 0.0f;
+ out1 = 0.0f;
+ out2 = 0.0f;
+}
+
+void applyFilter(int16_t* buffer, size_t sampleCount) {
+
+ for (size_t i=0 ; i<sampleCount ; i++) {
+
+ x0 = (double) buffer[i];
+
+ out0 = (m_fa*x0) + (m_fb*x1) + (m_fc*x2) + (m_fd*out1) + (m_fe*out2);
+
+ x2 = x1;
+ x1 = x0;
+
+ out2 = out1;
+ out1 = out0;
+
+ if (out0 > 32767.0f) {
+ buffer[i] = 32767;
+ } else if (out0 < -32768.0f) {
+ buffer[i] = -32768;
+ } else {
+ buffer[i] = (int16_t) out0;
+ }
+ }
+}
+
+
+// ----------------------------------------------------------------------------
static fields_t javaTTSFields;
// TODO move to synth member once we have multiple simultaneous engines running
@@ -198,12 +280,13 @@ static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate,
if (wav == NULL) {
delete pForAfter;
- LOGI("Null: speech has completed");
+ LOGV("Null: speech has completed");
}
if (bufferSize > 0) {
prepAudioTrack(pJniData, pForAfter->streamType, rate, format, channel);
if (pJniData->mAudioOut) {
+ applyFilter((int16_t*)wav, bufferSize/2);
pJniData->mAudioOut->write(wav, bufferSize);
memset(wav, 0, bufferSize);
//LOGV("AudioTrack wrote: %d bytes", bufferSize);
@@ -212,13 +295,14 @@ static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate,
}
}
} else if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) {
- LOGV("Save to file");
+ //LOGV("Save to file");
if (wav == NULL) {
delete pForAfter;
LOGV("Null: speech has completed");
return TTS_CALLBACK_HALT;
}
if (bufferSize > 0){
+ applyFilter((int16_t*)wav, bufferSize/2);
fwrite(wav, 1, bufferSize, pForAfter->outputFile);
memset(wav, 0, bufferSize);
}
@@ -289,6 +373,8 @@ android_tts_SynthProxy_native_setup(JNIEnv *env, jobject thiz,
env->SetIntField(thiz, javaTTSFields.synthProxyFieldJniData,
(int)pJniStorage);
+ initializeEQ();
+
env->ReleaseStringUTFChars(nativeSoLib, nativeSoLibNativeString);
}
@@ -479,6 +565,8 @@ android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData,
return result;
}
+ initializeFilter();
+
Mutex::Autolock l(engineMutex);
// Retrieve audio parameters before writing the file header
@@ -583,6 +671,8 @@ android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData,
return result;
}
+ initializeFilter();
+
Mutex::Autolock l(engineMutex);
SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData;