summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--cmds/screenrecord/screenrecord.cpp153
-rw-r--r--cmds/screenrecord/screenrecord.h2
-rw-r--r--include/media/AudioBufferProvider.h11
-rw-r--r--include/media/AudioRecord.h3
-rw-r--r--include/media/AudioSystem.h28
-rw-r--r--include/media/AudioTrack.h13
-rw-r--r--include/media/stagefright/MetaData.h1
-rwxr-xr-xlibvideoeditor/lvpp/VideoEditorPreviewController.cpp4
-rwxr-xr-xlibvideoeditor/osal/inc/M4OSA_Error.h2
-rwxr-xr-xlibvideoeditor/vss/stagefrightshells/src/VideoEditor3gpReader.cpp8
-rwxr-xr-xlibvideoeditor/vss/stagefrightshells/src/VideoEditorAudioDecoder.cpp2
-rw-r--r--media/libeffects/loudness/EffectLoudnessEnhancer.cpp14
-rw-r--r--media/libeffects/proxy/EffectProxy.cpp14
-rw-r--r--media/libmedia/AudioRecord.cpp21
-rw-r--r--media/libmedia/AudioSystem.cpp38
-rw-r--r--media/libmedia/AudioTrack.cpp76
-rw-r--r--media/libmedia/AudioTrackShared.cpp4
-rw-r--r--media/libmedia/IMediaDeathNotifier.cpp2
-rw-r--r--media/libmedia/SoundPool.cpp4
-rw-r--r--media/libmedia/mediametadataretriever.cpp2
-rw-r--r--media/libmedia/mediaplayer.cpp2
-rw-r--r--media/libnbaio/MonoPipe.cpp2
-rw-r--r--media/libstagefright/MPEG4Extractor.cpp198
-rw-r--r--media/libstagefright/Utils.cpp11
-rw-r--r--media/libstagefright/codecs/aacdec/SoftAAC2.cpp7
-rw-r--r--media/libstagefright/httplive/M3UParser.cpp2
-rw-r--r--media/libstagefright/httplive/PlaylistFetcher.cpp9
-rw-r--r--media/libstagefright/mpeg2ts/AnotherPacketSource.cpp48
-rw-r--r--services/audioflinger/Android.mk10
-rw-r--r--services/audioflinger/AudioPolicyService.cpp8
-rw-r--r--services/audioflinger/AudioResampler.cpp27
-rw-r--r--services/audioflinger/AudioResampler.h3
-rw-r--r--services/audioflinger/AudioResamplerDyn.cpp551
-rw-r--r--services/audioflinger/AudioResamplerDyn.h124
-rw-r--r--services/audioflinger/AudioResamplerFirGen.h684
-rw-r--r--services/audioflinger/AudioResamplerFirOps.h163
-rw-r--r--services/audioflinger/AudioResamplerFirProcess.h256
-rw-r--r--services/audioflinger/AudioResamplerFirProcessNeon.h1149
-rw-r--r--services/audioflinger/Threads.cpp11
-rw-r--r--services/audioflinger/Tracks.cpp2
-rw-r--r--services/audioflinger/test-resample.cpp167
-rw-r--r--services/camera/libcameraservice/api2/CameraDeviceClient.cpp64
-rw-r--r--tools/resampler_tools/fir.cpp84
43 files changed, 3734 insertions, 250 deletions
diff --git a/cmds/screenrecord/screenrecord.cpp b/cmds/screenrecord/screenrecord.cpp
index 61f83e3..b6f150c 100644
--- a/cmds/screenrecord/screenrecord.cpp
+++ b/cmds/screenrecord/screenrecord.cpp
@@ -45,6 +45,7 @@
#include <signal.h>
#include <getopt.h>
#include <sys/wait.h>
+#include <termios.h>
#include <assert.h>
#include "screenrecord.h"
@@ -61,6 +62,7 @@ static const uint32_t kFallbackHeight = 720;
// Command-line parameters.
static bool gVerbose = false; // chatty on stdout
static bool gRotate = false; // rotate 90 degrees
+static bool gRawOutput = false; // generate raw H.264 byte stream output
static bool gSizeSpecified = false; // was size explicitly requested?
static bool gWantInfoScreen = false; // do we want initial info screen?
static bool gWantFrameTime = false; // do we want times on each frame?
@@ -298,10 +300,12 @@ static status_t prepareVirtualDisplay(const DisplayInfo& mainDpyInfo,
* input frames are coming from the virtual display as fast as SurfaceFlinger
* wants to send them.
*
+ * Exactly one of muxer or rawFp must be non-null.
+ *
* The muxer must *not* have been started before calling.
*/
static status_t runEncoder(const sp<MediaCodec>& encoder,
- const sp<MediaMuxer>& muxer, const sp<IBinder>& mainDpy,
+ const sp<MediaMuxer>& muxer, FILE* rawFp, const sp<IBinder>& mainDpy,
const sp<IBinder>& virtualDpy, uint8_t orientation) {
static int kTimeout = 250000; // be responsive on signal
status_t err;
@@ -311,6 +315,8 @@ static status_t runEncoder(const sp<MediaCodec>& encoder,
int64_t endWhenNsec = startWhenNsec + seconds_to_nanoseconds(gTimeLimitSec);
DisplayInfo mainDpyInfo;
+ assert((rawFp == NULL && muxer != NULL) || (rawFp != NULL && muxer == NULL));
+
Vector<sp<ABuffer> > buffers;
err = encoder->getOutputBuffers(&buffers);
if (err != NO_ERROR) {
@@ -342,15 +348,16 @@ static status_t runEncoder(const sp<MediaCodec>& encoder,
case NO_ERROR:
// got a buffer
if ((flags & MediaCodec::BUFFER_FLAG_CODECCONFIG) != 0) {
- // ignore this -- we passed the CSD into MediaMuxer when
- // we got the format change notification
- ALOGV("Got codec config buffer (%u bytes); ignoring", size);
- size = 0;
+ ALOGV("Got codec config buffer (%u bytes)", size);
+ if (muxer != NULL) {
+ // ignore this -- we passed the CSD into MediaMuxer when
+ // we got the format change notification
+ size = 0;
+ }
}
if (size != 0) {
ALOGV("Got data in buffer %d, size=%d, pts=%lld",
bufIndex, size, ptsUsec);
- assert(trackIdx != -1);
{ // scope
ATRACE_NAME("orientation");
@@ -379,14 +386,23 @@ static status_t runEncoder(const sp<MediaCodec>& encoder,
ptsUsec = systemTime(SYSTEM_TIME_MONOTONIC) / 1000;
}
- // The MediaMuxer docs are unclear, but it appears that we
- // need to pass either the full set of BufferInfo flags, or
- // (flags & BUFFER_FLAG_SYNCFRAME).
- //
- // If this blocks for too long we could drop frames. We may
- // want to queue these up and do them on a different thread.
- { // scope
+ if (muxer == NULL) {
+ fwrite(buffers[bufIndex]->data(), 1, size, rawFp);
+ // Flush the data immediately in case we're streaming.
+ // We don't want to do this if all we've written is
+ // the SPS/PPS data because mplayer gets confused.
+ if ((flags & MediaCodec::BUFFER_FLAG_CODECCONFIG) == 0) {
+ fflush(rawFp);
+ }
+ } else {
+ // The MediaMuxer docs are unclear, but it appears that we
+ // need to pass either the full set of BufferInfo flags, or
+ // (flags & BUFFER_FLAG_SYNCFRAME).
+ //
+ // If this blocks for too long we could drop frames. We may
+ // want to queue these up and do them on a different thread.
ATRACE_NAME("write sample");
+ assert(trackIdx != -1);
err = muxer->writeSampleData(buffers[bufIndex], trackIdx,
ptsUsec, flags);
if (err != NO_ERROR) {
@@ -418,12 +434,14 @@ static status_t runEncoder(const sp<MediaCodec>& encoder,
ALOGV("Encoder format changed");
sp<AMessage> newFormat;
encoder->getOutputFormat(&newFormat);
- trackIdx = muxer->addTrack(newFormat);
- ALOGV("Starting muxer");
- err = muxer->start();
- if (err != NO_ERROR) {
- fprintf(stderr, "Unable to start muxer (err=%d)\n", err);
- return err;
+ if (muxer != NULL) {
+ trackIdx = muxer->addTrack(newFormat);
+ ALOGV("Starting muxer");
+ err = muxer->start();
+ if (err != NO_ERROR) {
+ fprintf(stderr, "Unable to start muxer (err=%d)\n", err);
+ return err;
+ }
}
}
break;
@@ -457,6 +475,44 @@ static status_t runEncoder(const sp<MediaCodec>& encoder,
}
/*
+ * Raw H.264 byte stream output requested. Send the output to stdout
+ * if desired. If the output is a tty, reconfigure it to avoid the
+ * CRLF line termination that we see with "adb shell" commands.
+ */
+static FILE* prepareRawOutput(const char* fileName) {
+ FILE* rawFp = NULL;
+
+ if (strcmp(fileName, "-") == 0) {
+ if (gVerbose) {
+ fprintf(stderr, "ERROR: verbose output and '-' not compatible");
+ return NULL;
+ }
+ rawFp = stdout;
+ } else {
+ rawFp = fopen(fileName, "w");
+ if (rawFp == NULL) {
+ fprintf(stderr, "fopen raw failed: %s\n", strerror(errno));
+ return NULL;
+ }
+ }
+
+ int fd = fileno(rawFp);
+ if (isatty(fd)) {
+ // best effort -- reconfigure tty for "raw"
+ ALOGD("raw video output to tty (fd=%d)", fd);
+ struct termios term;
+ if (tcgetattr(fd, &term) == 0) {
+ cfmakeraw(&term);
+ if (tcsetattr(fd, TCSANOW, &term) == 0) {
+ ALOGD("tty successfully configured for raw");
+ }
+ }
+ }
+
+ return rawFp;
+}
+
+/*
* Main "do work" method.
*
* Configures codec, muxer, and virtual display, then starts moving bits
@@ -558,16 +614,26 @@ static status_t recordScreen(const char* fileName) {
return err;
}
- // Configure muxer. We have to wait for the CSD blob from the encoder
- // before we can start it.
- sp<MediaMuxer> muxer = new MediaMuxer(fileName,
- MediaMuxer::OUTPUT_FORMAT_MPEG_4);
- if (gRotate) {
- muxer->setOrientationHint(90); // TODO: does this do anything?
+ sp<MediaMuxer> muxer = NULL;
+ FILE* rawFp = NULL;
+ if (gRawOutput) {
+ rawFp = prepareRawOutput(fileName);
+ if (rawFp == NULL) {
+ encoder->release();
+ return -1;
+ }
+ } else {
+ // Configure muxer. We have to wait for the CSD blob from the encoder
+ // before we can start it.
+ muxer = new MediaMuxer(fileName, MediaMuxer::OUTPUT_FORMAT_MPEG_4);
+ if (gRotate) {
+ muxer->setOrientationHint(90); // TODO: does this do anything?
+ }
}
// Main encoder loop.
- err = runEncoder(encoder, muxer, mainDpy, dpy, mainDpyInfo.orientation);
+ err = runEncoder(encoder, muxer, rawFp, mainDpy, dpy,
+ mainDpyInfo.orientation);
if (err != NO_ERROR) {
fprintf(stderr, "Encoder failed (err=%d)\n", err);
// fall through to cleanup
@@ -584,9 +650,13 @@ static status_t recordScreen(const char* fileName) {
overlay->stop();
}
encoder->stop();
- // If we don't stop muxer explicitly, i.e. let the destructor run,
- // it may hang (b/11050628).
- muxer->stop();
+ if (muxer != NULL) {
+ // If we don't stop muxer explicitly, i.e. let the destructor run,
+ // it may hang (b/11050628).
+ muxer->stop();
+ } else if (rawFp != stdout) {
+ fclose(rawFp);
+ }
encoder->release();
return err;
@@ -753,6 +823,7 @@ int main(int argc, char* const argv[]) {
{ "show-frame-time", no_argument, NULL, 'f' },
{ "bugreport", no_argument, NULL, 'u' },
{ "rotate", no_argument, NULL, 'r' },
+ { "raw", no_argument, NULL, 'w' },
{ NULL, 0, NULL, 0 }
};
@@ -818,6 +889,10 @@ int main(int argc, char* const argv[]) {
// experimental feature
gRotate = true;
break;
+ case 'w':
+ // experimental feature
+ gRawOutput = true;
+ break;
default:
if (ic != '?') {
fprintf(stderr, "getopt_long returned unexpected value 0x%x\n", ic);
@@ -831,17 +906,19 @@ int main(int argc, char* const argv[]) {
return 2;
}
- // MediaMuxer tries to create the file in the constructor, but we don't
- // learn about the failure until muxer.start(), which returns a generic
- // error code without logging anything. We attempt to create the file
- // now for better diagnostics.
const char* fileName = argv[optind];
- int fd = open(fileName, O_CREAT | O_RDWR, 0644);
- if (fd < 0) {
- fprintf(stderr, "Unable to open '%s': %s\n", fileName, strerror(errno));
- return 1;
+ if (!gRawOutput) {
+ // MediaMuxer tries to create the file in the constructor, but we don't
+ // learn about the failure until muxer.start(), which returns a generic
+ // error code without logging anything. We attempt to create the file
+ // now for better diagnostics.
+ int fd = open(fileName, O_CREAT | O_RDWR, 0644);
+ if (fd < 0) {
+ fprintf(stderr, "Unable to open '%s': %s\n", fileName, strerror(errno));
+ return 1;
+ }
+ close(fd);
}
- close(fd);
status_t err = recordScreen(fileName);
if (err == NO_ERROR) {
diff --git a/cmds/screenrecord/screenrecord.h b/cmds/screenrecord/screenrecord.h
index 95e8a68..9b058c2 100644
--- a/cmds/screenrecord/screenrecord.h
+++ b/cmds/screenrecord/screenrecord.h
@@ -18,6 +18,6 @@
#define SCREENRECORD_SCREENRECORD_H
#define kVersionMajor 1
-#define kVersionMinor 1
+#define kVersionMinor 2
#endif /*SCREENRECORD_SCREENRECORD_H*/
diff --git a/include/media/AudioBufferProvider.h b/include/media/AudioBufferProvider.h
index ef392f0..7be449c 100644
--- a/include/media/AudioBufferProvider.h
+++ b/include/media/AudioBufferProvider.h
@@ -61,6 +61,17 @@ public:
// buffer->frameCount 0
virtual status_t getNextBuffer(Buffer* buffer, int64_t pts = kInvalidPTS) = 0;
+ // Release (a portion of) the buffer previously obtained by getNextBuffer().
+ // It is permissible to call releaseBuffer() multiple times per getNextBuffer().
+ // On entry:
+ // buffer->frameCount number of frames to release, must be <= number of frames
+ // obtained but not yet released
+ // buffer->raw unused
+ // On return:
+ // buffer->frameCount 0; implementation MUST set to zero
+ // buffer->raw undefined; implementation is PERMITTED to set to any value,
+ // so if caller needs to continue using this buffer it must
+ // keep track of the pointer itself
virtual void releaseBuffer(Buffer* buffer) = 0;
};
diff --git a/include/media/AudioRecord.h b/include/media/AudioRecord.h
index b192bd3..0439cb0 100644
--- a/include/media/AudioRecord.h
+++ b/include/media/AudioRecord.h
@@ -412,6 +412,7 @@ private:
bool mPaused; // whether thread is requested to pause at next loop entry
bool mPausedInt; // whether thread internally requests pause
nsecs_t mPausedNs; // if mPausedInt then associated timeout, otherwise ignored
+ bool mIgnoreNextPausedInt; // whether to ignore next mPausedInt request
};
// body of AudioRecordThread::threadLoop()
@@ -422,7 +423,7 @@ private:
// NS_INACTIVE inactive so don't run again until re-started
// NS_NEVER never again
static const nsecs_t NS_WHENEVER = -1, NS_INACTIVE = -2, NS_NEVER = -3;
- nsecs_t processAudioBuffer(const sp<AudioRecordThread>& thread);
+ nsecs_t processAudioBuffer();
// caller must hold lock on mLock for all _l methods
status_t openRecord_l(size_t epoch);
diff --git a/include/media/AudioSystem.h b/include/media/AudioSystem.h
index b96b8a1..ca9aaf7 100644
--- a/include/media/AudioSystem.h
+++ b/include/media/AudioSystem.h
@@ -67,20 +67,24 @@ public:
// returns true in *state if tracks are active on the specified stream or have been active
// in the past inPastMs milliseconds
- static status_t isStreamActive(audio_stream_type_t stream, bool *state, uint32_t inPastMs = 0);
+ static status_t isStreamActive(audio_stream_type_t stream, bool *state, uint32_t inPastMs);
// returns true in *state if tracks are active for what qualifies as remote playback
// on the specified stream or have been active in the past inPastMs milliseconds. Remote
// playback isn't mutually exclusive with local playback.
static status_t isStreamActiveRemotely(audio_stream_type_t stream, bool *state,
- uint32_t inPastMs = 0);
+ uint32_t inPastMs);
// returns true in *state if a recorder is currently recording with the specified source
static status_t isSourceActive(audio_source_t source, bool *state);
// set/get audio hardware parameters. The function accepts a list of parameters
// key value pairs in the form: key1=value1;key2=value2;...
// Some keys are reserved for standard parameters (See AudioParameter class).
+ // The versions with audio_io_handle_t are intended for internal media framework use only.
static status_t setParameters(audio_io_handle_t ioHandle, const String8& keyValuePairs);
static String8 getParameters(audio_io_handle_t ioHandle, const String8& keys);
+ // The versions without audio_io_handle_t are intended for JNI.
+ static status_t setParameters(const String8& keyValuePairs);
+ static String8 getParameters(const String8& keys);
static void setErrorCallback(audio_error_callback cb);
@@ -90,12 +94,14 @@ public:
static float linearToLog(int volume);
static int logToLinear(float volume);
+ // Returned samplingRate and frameCount output values are guaranteed
+ // to be non-zero if status == NO_ERROR
static status_t getOutputSamplingRate(uint32_t* samplingRate,
- audio_stream_type_t stream = AUDIO_STREAM_DEFAULT);
+ audio_stream_type_t stream);
static status_t getOutputFrameCount(size_t* frameCount,
- audio_stream_type_t stream = AUDIO_STREAM_DEFAULT);
+ audio_stream_type_t stream);
static status_t getOutputLatency(uint32_t* latency,
- audio_stream_type_t stream = AUDIO_STREAM_DEFAULT);
+ audio_stream_type_t stream);
static status_t getSamplingRate(audio_io_handle_t output,
audio_stream_type_t streamType,
uint32_t* samplingRate);
@@ -202,16 +208,16 @@ public:
const audio_offload_info_t *offloadInfo = NULL);
static status_t startOutput(audio_io_handle_t output,
audio_stream_type_t stream,
- int session = 0);
+ int session);
static status_t stopOutput(audio_io_handle_t output,
audio_stream_type_t stream,
- int session = 0);
+ int session);
static void releaseOutput(audio_io_handle_t output);
static audio_io_handle_t getInput(audio_source_t inputSource,
- uint32_t samplingRate = 0,
- audio_format_t format = AUDIO_FORMAT_DEFAULT,
- audio_channel_mask_t channelMask = AUDIO_CHANNEL_IN_MONO,
- int sessionId = 0);
+ uint32_t samplingRate,
+ audio_format_t format,
+ audio_channel_mask_t channelMask,
+ int sessionId);
static status_t startInput(audio_io_handle_t input);
static status_t stopInput(audio_io_handle_t input);
static void releaseInput(audio_io_handle_t input);
diff --git a/include/media/AudioTrack.h b/include/media/AudioTrack.h
index 2717549..96135a6 100644
--- a/include/media/AudioTrack.h
+++ b/include/media/AudioTrack.h
@@ -123,6 +123,8 @@ public:
* - NO_ERROR: successful operation
* - NO_INIT: audio server or audio hardware not initialized
* - BAD_VALUE: unsupported configuration
+ * frameCount is guaranteed to be non-zero if status is NO_ERROR,
+ * and is undefined otherwise.
*/
static status_t getMinFrameCount(size_t* frameCount,
@@ -566,7 +568,7 @@ public:
uint32_t getUnderrunFrames() const;
/* Get the flags */
- audio_output_flags_t getFlags() const { return mFlags; }
+ audio_output_flags_t getFlags() const { AutoMutex _l(mLock); return mFlags; }
/* Set parameters - only possible when using direct output */
status_t setParameters(const String8& keyValuePairs);
@@ -626,9 +628,9 @@ protected:
// NS_INACTIVE inactive so don't run again until re-started
// NS_NEVER never again
static const nsecs_t NS_WHENEVER = -1, NS_INACTIVE = -2, NS_NEVER = -3;
- nsecs_t processAudioBuffer(const sp<AudioTrackThread>& thread);
- status_t processStreamEnd(int32_t waitCount);
+ nsecs_t processAudioBuffer();
+ bool isOffloaded() const;
// caller must hold lock on mLock for all _l methods
@@ -650,7 +652,7 @@ protected:
// FIXME enum is faster than strcmp() for parameter 'from'
status_t restoreTrack_l(const char *from);
- bool isOffloaded() const
+ bool isOffloaded_l() const
{ return (mFlags & AUDIO_OUTPUT_FLAG_COMPRESS_OFFLOAD) != 0; }
// Next 3 fields may be changed if IAudioTrack is re-created, but always != 0
@@ -720,6 +722,9 @@ protected:
uint32_t mUpdatePeriod; // in frames, zero means no EVENT_NEW_POS
audio_output_flags_t mFlags;
+ // const after set(), except for bits AUDIO_OUTPUT_FLAG_FAST and AUDIO_OUTPUT_FLAG_OFFLOAD.
+ // mLock must be held to read or write those bits reliably.
+
int mSessionId;
int mAuxEffectId;
diff --git a/include/media/stagefright/MetaData.h b/include/media/stagefright/MetaData.h
index de3fc36..3a87474 100644
--- a/include/media/stagefright/MetaData.h
+++ b/include/media/stagefright/MetaData.h
@@ -134,6 +134,7 @@ enum {
kKeyRequiresSecureBuffers = 'secu', // bool (int32_t)
kKeyIsADTS = 'adts', // bool (int32_t)
+ kKeyAACAOT = 'aaot', // int32_t
// If a MediaBuffer's data represents (at least partially) encrypted
// data, the following fields aid in decryption.
diff --git a/libvideoeditor/lvpp/VideoEditorPreviewController.cpp b/libvideoeditor/lvpp/VideoEditorPreviewController.cpp
index 149c4ea..c3cd3d0 100755
--- a/libvideoeditor/lvpp/VideoEditorPreviewController.cpp
+++ b/libvideoeditor/lvpp/VideoEditorPreviewController.cpp
@@ -1248,7 +1248,7 @@ void VideoEditorPreviewController::notify(
case MEDIA_SET_VIDEO_SIZE:
ALOGV("MEDIA_SET_VIDEO_SIZE; New video size %d x %d", ext1, ext2);
break;
- case 0xAAAAAAAA:
+ case static_cast<int>(0xAAAAAAAA):
ALOGV("VIDEO PLAYBACK ALMOST over, prepare next player");
// Select next player and prepare it
// If there is a clip after this one
@@ -1268,7 +1268,7 @@ void VideoEditorPreviewController::notify(
}
}
break;
- case 0xBBBBBBBB:
+ case static_cast<int>(0xBBBBBBBB):
{
ALOGV("VIDEO PLAYBACK, Update Overlay");
int overlayIndex = ext2;
diff --git a/libvideoeditor/osal/inc/M4OSA_Error.h b/libvideoeditor/osal/inc/M4OSA_Error.h
index 4d59529..75c3177 100755
--- a/libvideoeditor/osal/inc/M4OSA_Error.h
+++ b/libvideoeditor/osal/inc/M4OSA_Error.h
@@ -57,7 +57,7 @@ typedef M4OSA_UInt32 M4OSA_ERR;
* @arg coreID: (IN) [M4OSA_UInt32] CoreID to put in the error code
* @arg errorID: (IN) [M4OSA_UInt32] ErrorID to put in the error code*/
#define M4OSA_ERR_CREATE(severity, coreID, errorID)\
- (M4OSA_Int32)((((M4OSA_UInt32)severity)<<30)+((((M4OSA_UInt32)coreID)&0x003FFF)<<16)+(((M4OSA_UInt32)errorID)&0x00FFFF))
+ (M4OSA_UInt32)((((M4OSA_UInt32)severity)<<30)+((((M4OSA_UInt32)coreID)&0x003FFF)<<16)+(((M4OSA_UInt32)errorID)&0x00FFFF))
/** This macro extracts the 3 fields from the error:
* @arg error: (IN) [M4OSA_ERR] Error code
diff --git a/libvideoeditor/vss/stagefrightshells/src/VideoEditor3gpReader.cpp b/libvideoeditor/vss/stagefrightshells/src/VideoEditor3gpReader.cpp
index 3c8915a..99cf9ec 100755
--- a/libvideoeditor/vss/stagefrightshells/src/VideoEditor3gpReader.cpp
+++ b/libvideoeditor/vss/stagefrightshells/src/VideoEditor3gpReader.cpp
@@ -776,16 +776,16 @@ M4OSA_ERR VideoEditor3gpReader_setOption(M4OSA_Context context,
case M4READER_kOptionID_SetOsaFileReaderFctsPtr:
break;
- case M4READER_3GP_kOptionID_AudioOnly:
+ case static_cast<M4OSA_OptionID>(M4READER_3GP_kOptionID_AudioOnly):
break;
- case M4READER_3GP_kOptionID_VideoOnly:
+ case static_cast<M4OSA_OptionID>(M4READER_3GP_kOptionID_VideoOnly):
break;
- case M4READER_3GP_kOptionID_FastOpenMode:
+ case static_cast<M4OSA_OptionID>(M4READER_3GP_kOptionID_FastOpenMode):
break;
- case M4READER_kOptionID_MaxMetadataSize:
+ case static_cast<M4OSA_OptionID>(M4READER_kOptionID_MaxMetadataSize):
break;
default:
diff --git a/libvideoeditor/vss/stagefrightshells/src/VideoEditorAudioDecoder.cpp b/libvideoeditor/vss/stagefrightshells/src/VideoEditorAudioDecoder.cpp
index 9b35d07..e4c7ea1 100755
--- a/libvideoeditor/vss/stagefrightshells/src/VideoEditorAudioDecoder.cpp
+++ b/libvideoeditor/vss/stagefrightshells/src/VideoEditorAudioDecoder.cpp
@@ -809,7 +809,7 @@ M4OSA_ERR VideoEditorAudioDecoder_setOption(M4AD_Context pContext,
pDecoderContext = (VideoEditorAudioDecoder_Context*)pContext;
switch( optionID ) {
- case M4AD_kOptionID_UserParam:
+ case static_cast<M4OSA_UInt32>(M4AD_kOptionID_UserParam):
ALOGV("VideoEditorAudioDecodersetOption UserParam is not supported");
err = M4ERR_NOT_IMPLEMENTED;
break;
diff --git a/media/libeffects/loudness/EffectLoudnessEnhancer.cpp b/media/libeffects/loudness/EffectLoudnessEnhancer.cpp
index 91ed677..3c2b320 100644
--- a/media/libeffects/loudness/EffectLoudnessEnhancer.cpp
+++ b/media/libeffects/loudness/EffectLoudnessEnhancer.cpp
@@ -453,13 +453,13 @@ const struct effect_interface_s gLEInterface = {
// This is the only symbol that needs to be exported
__attribute__ ((visibility ("default")))
audio_effect_library_t AUDIO_EFFECT_LIBRARY_INFO_SYM = {
- tag : AUDIO_EFFECT_LIBRARY_TAG,
- version : EFFECT_LIBRARY_API_VERSION,
- name : "Loudness Enhancer Library",
- implementor : "The Android Open Source Project",
- create_effect : LELib_Create,
- release_effect : LELib_Release,
- get_descriptor : LELib_GetDescriptor,
+ .tag = AUDIO_EFFECT_LIBRARY_TAG,
+ .version = EFFECT_LIBRARY_API_VERSION,
+ .name = "Loudness Enhancer Library",
+ .implementor = "The Android Open Source Project",
+ .create_effect = LELib_Create,
+ .release_effect = LELib_Release,
+ .get_descriptor = LELib_GetDescriptor,
};
}; // extern "C"
diff --git a/media/libeffects/proxy/EffectProxy.cpp b/media/libeffects/proxy/EffectProxy.cpp
index dd4ad08..e6faaaf 100644
--- a/media/libeffects/proxy/EffectProxy.cpp
+++ b/media/libeffects/proxy/EffectProxy.cpp
@@ -329,11 +329,11 @@ int Effect_getDescriptor(effect_handle_t self,
__attribute__ ((visibility ("default")))
audio_effect_library_t AUDIO_EFFECT_LIBRARY_INFO_SYM = {
- tag : AUDIO_EFFECT_LIBRARY_TAG,
- version : EFFECT_LIBRARY_API_VERSION,
- name : "Effect Proxy",
- implementor : "AOSP",
- create_effect : android::EffectProxyCreate,
- release_effect : android::EffectProxyRelease,
- get_descriptor : android::EffectProxyGetDescriptor,
+ .tag = AUDIO_EFFECT_LIBRARY_TAG,
+ .version = EFFECT_LIBRARY_API_VERSION,
+ .name = "Effect Proxy",
+ .implementor = "AOSP",
+ .create_effect = android::EffectProxyCreate,
+ .release_effect = android::EffectProxyRelease,
+ .get_descriptor = android::EffectProxyGetDescriptor,
};
diff --git a/media/libmedia/AudioRecord.cpp b/media/libmedia/AudioRecord.cpp
index df99147..e39a475 100644
--- a/media/libmedia/AudioRecord.cpp
+++ b/media/libmedia/AudioRecord.cpp
@@ -692,7 +692,7 @@ ssize_t AudioRecord::read(void* buffer, size_t userSize)
// -------------------------------------------------------------------------
-nsecs_t AudioRecord::processAudioBuffer(const sp<AudioRecordThread>& thread)
+nsecs_t AudioRecord::processAudioBuffer()
{
mLock.lock();
if (mAwaitBoost) {
@@ -954,7 +954,7 @@ status_t AudioRecord::restoreRecord_l(const char *from)
// =========================================================================
-void AudioRecord::DeathNotifier::binderDied(const wp<IBinder>& who)
+void AudioRecord::DeathNotifier::binderDied(const wp<IBinder>& who __unused)
{
sp<AudioRecord> audioRecord = mAudioRecord.promote();
if (audioRecord != 0) {
@@ -966,7 +966,8 @@ void AudioRecord::DeathNotifier::binderDied(const wp<IBinder>& who)
// =========================================================================
AudioRecord::AudioRecordThread::AudioRecordThread(AudioRecord& receiver, bool bCanCallJava)
- : Thread(bCanCallJava), mReceiver(receiver), mPaused(true), mPausedInt(false), mPausedNs(0LL)
+ : Thread(bCanCallJava), mReceiver(receiver), mPaused(true), mPausedInt(false), mPausedNs(0LL),
+ mIgnoreNextPausedInt(false)
{
}
@@ -983,6 +984,10 @@ bool AudioRecord::AudioRecordThread::threadLoop()
// caller will check for exitPending()
return true;
}
+ if (mIgnoreNextPausedInt) {
+ mIgnoreNextPausedInt = false;
+ mPausedInt = false;
+ }
if (mPausedInt) {
if (mPausedNs > 0) {
(void) mMyCond.waitRelative(mMyLock, mPausedNs);
@@ -993,7 +998,7 @@ bool AudioRecord::AudioRecordThread::threadLoop()
return true;
}
}
- nsecs_t ns = mReceiver.processAudioBuffer(this);
+ nsecs_t ns = mReceiver.processAudioBuffer();
switch (ns) {
case 0:
return true;
@@ -1017,12 +1022,7 @@ void AudioRecord::AudioRecordThread::requestExit()
{
// must be in this order to avoid a race condition
Thread::requestExit();
- AutoMutex _l(mMyLock);
- if (mPaused || mPausedInt) {
- mPaused = false;
- mPausedInt = false;
- mMyCond.signal();
- }
+ resume();
}
void AudioRecord::AudioRecordThread::pause()
@@ -1034,6 +1034,7 @@ void AudioRecord::AudioRecordThread::pause()
void AudioRecord::AudioRecordThread::resume()
{
AutoMutex _l(mMyLock);
+ mIgnoreNextPausedInt = true;
if (mPaused || mPausedInt) {
mPaused = false;
mPausedInt = false;
diff --git a/media/libmedia/AudioSystem.cpp b/media/libmedia/AudioSystem.cpp
index 8033c2c..4580030 100644
--- a/media/libmedia/AudioSystem.cpp
+++ b/media/libmedia/AudioSystem.cpp
@@ -40,10 +40,10 @@ audio_error_callback AudioSystem::gAudioErrorCallback = NULL;
DefaultKeyedVector<audio_io_handle_t, AudioSystem::OutputDescriptor *> AudioSystem::gOutputs(0);
// Cached values for recording queries, all protected by gLock
-uint32_t AudioSystem::gPrevInSamplingRate = 16000;
-audio_format_t AudioSystem::gPrevInFormat = AUDIO_FORMAT_PCM_16_BIT;
-audio_channel_mask_t AudioSystem::gPrevInChannelMask = AUDIO_CHANNEL_IN_MONO;
-size_t AudioSystem::gInBuffSize = 0;
+uint32_t AudioSystem::gPrevInSamplingRate;
+audio_format_t AudioSystem::gPrevInFormat;
+audio_channel_mask_t AudioSystem::gPrevInChannelMask;
+size_t AudioSystem::gInBuffSize = 0; // zero indicates cache is invalid
// establish binder interface to AudioFlinger service
@@ -190,6 +190,16 @@ String8 AudioSystem::getParameters(audio_io_handle_t ioHandle, const String8& ke
return result;
}
+status_t AudioSystem::setParameters(const String8& keyValuePairs)
+{
+ return setParameters((audio_io_handle_t) 0, keyValuePairs);
+}
+
+String8 AudioSystem::getParameters(const String8& keys)
+{
+ return getParameters((audio_io_handle_t) 0, keys);
+}
+
// convert volume steps to natural log scale
// change this value to change volume scaling
@@ -249,6 +259,11 @@ status_t AudioSystem::getSamplingRate(audio_io_handle_t output,
*samplingRate = outputDesc->samplingRate;
gLock.unlock();
}
+ if (*samplingRate == 0) {
+ ALOGE("AudioSystem::getSamplingRate failed for output %d stream type %d",
+ output, streamType);
+ return BAD_VALUE;
+ }
ALOGV("getSamplingRate() streamType %d, output %d, sampling rate %u", streamType, output,
*samplingRate);
@@ -289,6 +304,11 @@ status_t AudioSystem::getFrameCount(audio_io_handle_t output,
*frameCount = outputDesc->frameCount;
gLock.unlock();
}
+ if (*frameCount == 0) {
+ ALOGE("AudioSystem::getFrameCount failed for output %d stream type %d",
+ output, streamType);
+ return BAD_VALUE;
+ }
ALOGV("getFrameCount() streamType %d, output %d, frameCount %d", streamType, output,
*frameCount);
@@ -349,6 +369,12 @@ status_t AudioSystem::getInputBufferSize(uint32_t sampleRate, audio_format_t for
return PERMISSION_DENIED;
}
inBuffSize = af->getInputBufferSize(sampleRate, format, channelMask);
+ if (inBuffSize == 0) {
+ ALOGE("AudioSystem::getInputBufferSize failed sampleRate %d format %x channelMask %x",
+ sampleRate, format, channelMask);
+ return BAD_VALUE;
+ }
+ // A benign race is possible here: we could overwrite a fresher cache entry
gLock.lock();
// save the request params
gPrevInSamplingRate = sampleRate;
@@ -419,7 +445,7 @@ void AudioSystem::releaseAudioSessionId(int audioSession) {
// ---------------------------------------------------------------------------
-void AudioSystem::AudioFlingerClient::binderDied(const wp<IBinder>& who) {
+void AudioSystem::AudioFlingerClient::binderDied(const wp<IBinder>& who __unused) {
Mutex::Autolock _l(AudioSystem::gLock);
AudioSystem::gAudioFlinger.clear();
@@ -804,7 +830,7 @@ bool AudioSystem::isOffloadSupported(const audio_offload_info_t& info)
// ---------------------------------------------------------------------------
-void AudioSystem::AudioPolicyServiceClient::binderDied(const wp<IBinder>& who) {
+void AudioSystem::AudioPolicyServiceClient::binderDied(const wp<IBinder>& who __unused) {
Mutex::Autolock _l(AudioSystem::gLock);
AudioSystem::gAudioPolicyService.clear();
diff --git a/media/libmedia/AudioTrack.cpp b/media/libmedia/AudioTrack.cpp
index b48cb1f..8954d9f 100644
--- a/media/libmedia/AudioTrack.cpp
+++ b/media/libmedia/AudioTrack.cpp
@@ -44,9 +44,6 @@ status_t AudioTrack::getMinFrameCount(
return BAD_VALUE;
}
- // default to 0 in case of error
- *frameCount = 0;
-
// FIXME merge with similar code in createTrack_l(), except we're missing
// some information here that is available in createTrack_l():
// audio_io_handle_t output
@@ -54,16 +51,20 @@ status_t AudioTrack::getMinFrameCount(
// audio_channel_mask_t channelMask
// audio_output_flags_t flags
uint32_t afSampleRate;
- if (AudioSystem::getOutputSamplingRate(&afSampleRate, streamType) != NO_ERROR) {
- return NO_INIT;
+ status_t status;
+ status = AudioSystem::getOutputSamplingRate(&afSampleRate, streamType);
+ if (status != NO_ERROR) {
+ return status;
}
size_t afFrameCount;
- if (AudioSystem::getOutputFrameCount(&afFrameCount, streamType) != NO_ERROR) {
- return NO_INIT;
+ status = AudioSystem::getOutputFrameCount(&afFrameCount, streamType);
+ if (status != NO_ERROR) {
+ return status;
}
uint32_t afLatency;
- if (AudioSystem::getOutputLatency(&afLatency, streamType) != NO_ERROR) {
- return NO_INIT;
+ status = AudioSystem::getOutputLatency(&afLatency, streamType);
+ if (status != NO_ERROR) {
+ return status;
}
// Ensure that buffer depth covers at least audio hardware latency
@@ -74,6 +75,13 @@ status_t AudioTrack::getMinFrameCount(
*frameCount = (sampleRate == 0) ? afFrameCount * minBufCount :
afFrameCount * minBufCount * sampleRate / afSampleRate;
+ // The formula above should always produce a non-zero value, but return an error
+ // in the unlikely event that it does not, as that's part of the API contract.
+ if (*frameCount == 0) {
+ ALOGE("AudioTrack::getMinFrameCount failed for streamType %d, sampleRate %d",
+ streamType, sampleRate);
+ return BAD_VALUE;
+ }
ALOGV("getMinFrameCount=%d: afFrameCount=%d, minBufCount=%d, afSampleRate=%d, afLatency=%d",
*frameCount, afFrameCount, minBufCount, afSampleRate, afLatency);
return NO_ERROR;
@@ -237,12 +245,14 @@ status_t AudioTrack::set(
streamType = AUDIO_STREAM_MUSIC;
}
+ status_t status;
if (sampleRate == 0) {
- uint32_t afSampleRate;
- if (AudioSystem::getOutputSamplingRate(&afSampleRate, streamType) != NO_ERROR) {
- return NO_INIT;
+ status = AudioSystem::getOutputSamplingRate(&sampleRate, streamType);
+ if (status != NO_ERROR) {
+ ALOGE("Could not get output sample rate for stream type %d; status %d",
+ streamType, status);
+ return status;
}
- sampleRate = afSampleRate;
}
mSampleRate = sampleRate;
@@ -330,7 +340,7 @@ status_t AudioTrack::set(
}
// create the IAudioTrack
- status_t status = createTrack_l(streamType,
+ status = createTrack_l(streamType,
sampleRate,
format,
frameCount,
@@ -447,7 +457,7 @@ void AudioTrack::stop()
return;
}
- if (isOffloaded()) {
+ if (isOffloaded_l()) {
mState = STATE_STOPPING;
} else {
mState = STATE_STOPPED;
@@ -469,7 +479,7 @@ void AudioTrack::stop()
sp<AudioTrackThread> t = mAudioTrackThread;
if (t != 0) {
- if (!isOffloaded()) {
+ if (!isOffloaded_l()) {
t->pause();
}
} else {
@@ -507,7 +517,7 @@ void AudioTrack::flush_l()
mRefreshRemaining = true;
mState = STATE_FLUSHED;
- if (isOffloaded()) {
+ if (isOffloaded_l()) {
mProxy->interrupt();
}
mProxy->flush();
@@ -540,7 +550,7 @@ status_t AudioTrack::setVolume(float left, float right)
mProxy->setVolumeLR((uint32_t(uint16_t(right * 0x1000)) << 16) | uint16_t(left * 0x1000));
- if (isOffloaded()) {
+ if (isOffloaded_l()) {
mAudioTrack->signal();
}
return NO_ERROR;
@@ -604,7 +614,7 @@ uint32_t AudioTrack::getSampleRate() const
// sample rate can be updated during playback by the offloaded decoder so we need to
// query the HAL and update if needed.
// FIXME use Proxy return channel to update the rate from server and avoid polling here
- if (isOffloaded()) {
+ if (isOffloaded_l()) {
if (mOutput != 0) {
uint32_t sampleRate = 0;
status_t status = AudioSystem::getSamplingRate(mOutput, mStreamType, &sampleRate);
@@ -741,7 +751,7 @@ status_t AudioTrack::getPosition(uint32_t *position) const
}
AutoMutex lock(mLock);
- if (isOffloaded()) {
+ if (isOffloaded_l()) {
uint32_t dspFrames = 0;
if (mOutput != 0) {
@@ -1341,7 +1351,7 @@ status_t TimedAudioTrack::setMediaTimeTransform(const LinearTransform& xform,
// -------------------------------------------------------------------------
-nsecs_t AudioTrack::processAudioBuffer(const sp<AudioTrackThread>& thread)
+nsecs_t AudioTrack::processAudioBuffer()
{
// Currently the AudioTrack thread is not created if there are no callbacks.
// Would it ever make sense to run the thread, even without callbacks?
@@ -1379,7 +1389,7 @@ nsecs_t AudioTrack::processAudioBuffer(const sp<AudioTrackThread>& thread)
// for offloaded tracks restoreTrack_l() will just update the sequence and clear
// AudioSystem cache. We should not exit here but after calling the callback so
// that the upper layers can recreate the track
- if (!isOffloaded() || (mSequence == mObservedSequence)) {
+ if (!isOffloaded_l() || (mSequence == mObservedSequence)) {
status_t status = restoreTrack_l("processAudioBuffer");
mLock.unlock();
// Run again immediately, but with a new IAudioTrack
@@ -1666,7 +1676,7 @@ nsecs_t AudioTrack::processAudioBuffer(const sp<AudioTrackThread>& thread)
status_t AudioTrack::restoreTrack_l(const char *from)
{
ALOGW("dead IAudioTrack, %s, creating a new one from %s()",
- isOffloaded() ? "Offloaded" : "PCM", from);
+ isOffloaded_l() ? "Offloaded" : "PCM", from);
++mSequence;
status_t result;
@@ -1674,7 +1684,8 @@ status_t AudioTrack::restoreTrack_l(const char *from)
// output parameters in getOutput_l() and createTrack_l()
AudioSystem::clearAudioConfigCache();
- if (isOffloaded()) {
+ if (isOffloaded_l()) {
+ // FIXME re-creation of offloaded tracks is not yet implemented
return DEAD_OBJECT;
}
@@ -1760,14 +1771,21 @@ status_t AudioTrack::getTimestamp(AudioTimestamp& timestamp)
String8 AudioTrack::getParameters(const String8& keys)
{
- if (mOutput) {
- return AudioSystem::getParameters(mOutput, keys);
+ audio_io_handle_t output = getOutput();
+ if (output != 0) {
+ return AudioSystem::getParameters(output, keys);
} else {
return String8::empty();
}
}
-status_t AudioTrack::dump(int fd, const Vector<String16>& args) const
+bool AudioTrack::isOffloaded() const
+{
+ AutoMutex lock(mLock);
+ return isOffloaded_l();
+}
+
+status_t AudioTrack::dump(int fd, const Vector<String16>& args __unused) const
{
const size_t SIZE = 256;
@@ -1797,7 +1815,7 @@ uint32_t AudioTrack::getUnderrunFrames() const
// =========================================================================
-void AudioTrack::DeathNotifier::binderDied(const wp<IBinder>& who)
+void AudioTrack::DeathNotifier::binderDied(const wp<IBinder>& who __unused)
{
sp<AudioTrack> audioTrack = mAudioTrack.promote();
if (audioTrack != 0) {
@@ -1841,7 +1859,7 @@ bool AudioTrack::AudioTrackThread::threadLoop()
return true;
}
}
- nsecs_t ns = mReceiver.processAudioBuffer(this);
+ nsecs_t ns = mReceiver.processAudioBuffer();
switch (ns) {
case 0:
return true;
diff --git a/media/libmedia/AudioTrackShared.cpp b/media/libmedia/AudioTrackShared.cpp
index caa7900..7a1e207 100644
--- a/media/libmedia/AudioTrackShared.cpp
+++ b/media/libmedia/AudioTrackShared.cpp
@@ -765,7 +765,7 @@ ssize_t StaticAudioTrackServerProxy::pollPosition()
return (ssize_t) position;
}
-status_t StaticAudioTrackServerProxy::obtainBuffer(Buffer* buffer, bool ackFlush)
+status_t StaticAudioTrackServerProxy::obtainBuffer(Buffer* buffer, bool ackFlush __unused)
{
if (mIsShutdown) {
buffer->mFrameCount = 0;
@@ -847,7 +847,7 @@ void StaticAudioTrackServerProxy::releaseBuffer(Buffer* buffer)
buffer->mNonContig = 0;
}
-void StaticAudioTrackServerProxy::tallyUnderrunFrames(uint32_t frameCount)
+void StaticAudioTrackServerProxy::tallyUnderrunFrames(uint32_t frameCount __unused)
{
// Unlike AudioTrackServerProxy::tallyUnderrunFrames() used for streaming tracks,
// we don't have a location to count underrun frames. The underrun frame counter
diff --git a/media/libmedia/IMediaDeathNotifier.cpp b/media/libmedia/IMediaDeathNotifier.cpp
index 9db5b1b..10b4934 100644
--- a/media/libmedia/IMediaDeathNotifier.cpp
+++ b/media/libmedia/IMediaDeathNotifier.cpp
@@ -75,7 +75,7 @@ IMediaDeathNotifier::removeObitRecipient(const wp<IMediaDeathNotifier>& recipien
}
void
-IMediaDeathNotifier::DeathNotifier::binderDied(const wp<IBinder>& who) {
+IMediaDeathNotifier::DeathNotifier::binderDied(const wp<IBinder>& who __unused) {
ALOGW("media server died");
// Need to do this with the lock held
diff --git a/media/libmedia/SoundPool.cpp b/media/libmedia/SoundPool.cpp
index b420c95..98acd1f 100644
--- a/media/libmedia/SoundPool.cpp
+++ b/media/libmedia/SoundPool.cpp
@@ -199,7 +199,7 @@ SoundChannel* SoundPool::findNextChannel(int channelID)
return NULL;
}
-int SoundPool::load(const char* path, int priority)
+int SoundPool::load(const char* path, int priority __unused)
{
ALOGV("load: path=%s, priority=%d", path, priority);
Mutex::Autolock lock(&mLock);
@@ -209,7 +209,7 @@ int SoundPool::load(const char* path, int priority)
return sample->sampleID();
}
-int SoundPool::load(int fd, int64_t offset, int64_t length, int priority)
+int SoundPool::load(int fd, int64_t offset, int64_t length, int priority __unused)
{
ALOGV("load: fd=%d, offset=%lld, length=%lld, priority=%d",
fd, offset, length, priority);
diff --git a/media/libmedia/mediametadataretriever.cpp b/media/libmedia/mediametadataretriever.cpp
index 110b94c..bad2494 100644
--- a/media/libmedia/mediametadataretriever.cpp
+++ b/media/libmedia/mediametadataretriever.cpp
@@ -157,7 +157,7 @@ sp<IMemory> MediaMetadataRetriever::extractAlbumArt()
return mRetriever->extractAlbumArt();
}
-void MediaMetadataRetriever::DeathNotifier::binderDied(const wp<IBinder>& who) {
+void MediaMetadataRetriever::DeathNotifier::binderDied(const wp<IBinder>& who __unused) {
Mutex::Autolock lock(MediaMetadataRetriever::sServiceLock);
MediaMetadataRetriever::sService.clear();
ALOGW("MediaMetadataRetriever server died!");
diff --git a/media/libmedia/mediaplayer.cpp b/media/libmedia/mediaplayer.cpp
index 0f6d897..7a6f31d 100644
--- a/media/libmedia/mediaplayer.cpp
+++ b/media/libmedia/mediaplayer.cpp
@@ -654,7 +654,7 @@ status_t MediaPlayer::setRetransmitEndpoint(const char* addrString,
return BAD_VALUE;
}
- memset(&mRetransmitEndpoint, 0, sizeof(&mRetransmitEndpoint));
+ memset(&mRetransmitEndpoint, 0, sizeof(mRetransmitEndpoint));
mRetransmitEndpoint.sin_family = AF_INET;
mRetransmitEndpoint.sin_addr = saddr;
mRetransmitEndpoint.sin_port = htons(port);
diff --git a/media/libnbaio/MonoPipe.cpp b/media/libnbaio/MonoPipe.cpp
index de0ad28..3c61b60 100644
--- a/media/libnbaio/MonoPipe.cpp
+++ b/media/libnbaio/MonoPipe.cpp
@@ -183,7 +183,7 @@ ssize_t MonoPipe::write(const void *buffer, size_t count)
}
}
if (ns > 0) {
- const struct timespec req = {0, ns};
+ const struct timespec req = {0, static_cast<long>(ns)};
nanosleep(&req, NULL);
}
// record the time that this write() completed
diff --git a/media/libstagefright/MPEG4Extractor.cpp b/media/libstagefright/MPEG4Extractor.cpp
index 9b36b6a..9c89e82 100644
--- a/media/libstagefright/MPEG4Extractor.cpp
+++ b/media/libstagefright/MPEG4Extractor.cpp
@@ -2372,6 +2372,58 @@ status_t MPEG4Extractor::verifyTrack(Track *track) {
return OK;
}
+typedef enum {
+ //AOT_NONE = -1,
+ //AOT_NULL_OBJECT = 0,
+ //AOT_AAC_MAIN = 1, /**< Main profile */
+ AOT_AAC_LC = 2, /**< Low Complexity object */
+ //AOT_AAC_SSR = 3,
+ //AOT_AAC_LTP = 4,
+ AOT_SBR = 5,
+ //AOT_AAC_SCAL = 6,
+ //AOT_TWIN_VQ = 7,
+ //AOT_CELP = 8,
+ //AOT_HVXC = 9,
+ //AOT_RSVD_10 = 10, /**< (reserved) */
+ //AOT_RSVD_11 = 11, /**< (reserved) */
+ //AOT_TTSI = 12, /**< TTSI Object */
+ //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
+ //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
+ //AOT_GEN_MIDI = 15, /**< General MIDI object */
+ //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
+ AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
+ //AOT_RSVD_18 = 18, /**< (reserved) */
+ //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
+ AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
+ //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
+ AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
+ AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
+ //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
+ //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
+ //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
+ //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
+ //AOT_RSVD_28 = 28, /**< might become SSC */
+ AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
+ //AOT_MPEGS = 30, /**< MPEG Surround */
+
+ AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
+
+ //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
+ //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
+ //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
+ //AOT_RSVD_35 = 35, /**< might become DST */
+ //AOT_RSVD_36 = 36, /**< might become ALS */
+ //AOT_AAC_SLS = 37, /**< AAC + SLS */
+ //AOT_SLS = 38, /**< SLS */
+ //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
+
+ //AOT_USAC = 42, /**< USAC */
+ //AOT_SAOC = 43, /**< SAOC */
+ //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
+
+ //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
+} AUDIO_OBJECT_TYPE;
+
status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
const void *esds_data, size_t esds_size) {
ESDS esds(esds_data, esds_size);
@@ -2419,6 +2471,11 @@ status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
return ERROR_MALFORMED;
}
+ static uint32_t kSamplingRate[] = {
+ 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
+ 16000, 12000, 11025, 8000, 7350
+ };
+
ABitReader br(csd, csd_size);
uint32_t objectType = br.getBits(5);
@@ -2426,6 +2483,9 @@ status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
objectType = 32 + br.getBits(6);
}
+ //keep AOT type
+ mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
+
uint32_t freqIndex = br.getBits(4);
int32_t sampleRate = 0;
@@ -2438,28 +2498,134 @@ status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
numChannels = br.getBits(4);
} else {
numChannels = br.getBits(4);
- if (objectType == 5) {
- // SBR specific config per 14496-3 table 1.13
- freqIndex = br.getBits(4);
- if (freqIndex == 15) {
- if (csd_size < 8) {
- return ERROR_MALFORMED;
- }
- sampleRate = br.getBits(24);
- }
+
+ if (freqIndex == 13 || freqIndex == 14) {
+ return ERROR_MALFORMED;
}
- if (sampleRate == 0) {
- static uint32_t kSamplingRate[] = {
- 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
- 16000, 12000, 11025, 8000, 7350
- };
+ sampleRate = kSamplingRate[freqIndex];
+ }
- if (freqIndex == 13 || freqIndex == 14) {
+ if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
+ uint32_t extFreqIndex = br.getBits(4);
+ int32_t extSampleRate;
+ if (extFreqIndex == 15) {
+ if (csd_size < 8) {
+ return ERROR_MALFORMED;
+ }
+ extSampleRate = br.getBits(24);
+ } else {
+ if (extFreqIndex == 13 || extFreqIndex == 14) {
return ERROR_MALFORMED;
}
+ extSampleRate = kSamplingRate[extFreqIndex];
+ }
+ //TODO: save the extension sampling rate value in meta data =>
+ // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
+ }
+
+ switch (numChannels) {
+ // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
+ case 0:
+ case 1:// FC
+ case 2:// FL FR
+ case 3:// FC, FL FR
+ case 4:// FC, FL FR, RC
+ case 5:// FC, FL FR, SL SR
+ case 6:// FC, FL FR, SL SR, LFE
+ //numChannels already contains the right value
+ break;
+ case 11:// FC, FL FR, SL SR, RC, LFE
+ numChannels = 7;
+ break;
+ case 7: // FC, FCL FCR, FL FR, SL SR, LFE
+ case 12:// FC, FL FR, SL SR, RL RR, LFE
+ case 14:// FC, FL FR, SL SR, LFE, FHL FHR
+ numChannels = 8;
+ break;
+ default:
+ return ERROR_UNSUPPORTED;
+ }
+
+ {
+ if (objectType == AOT_SBR || objectType == AOT_PS) {
+ const int32_t extensionSamplingFrequency = br.getBits(4);
+ objectType = br.getBits(5);
+
+ if (objectType == AOT_ESCAPE) {
+ objectType = 32 + br.getBits(6);
+ }
+ }
+ if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
+ objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
+ objectType == AOT_ER_BSAC) {
+ const int32_t frameLengthFlag = br.getBits(1);
+
+ const int32_t dependsOnCoreCoder = br.getBits(1);
+
+ if (dependsOnCoreCoder ) {
+ const int32_t coreCoderDelay = br.getBits(14);
+ }
+
+ const int32_t extensionFlag = br.getBits(1);
+
+ if (numChannels == 0 ) {
+ int32_t channelsEffectiveNum = 0;
+ int32_t channelsNum = 0;
+ const int32_t ElementInstanceTag = br.getBits(4);
+ const int32_t Profile = br.getBits(2);
+ const int32_t SamplingFrequencyIndex = br.getBits(4);
+ const int32_t NumFrontChannelElements = br.getBits(4);
+ const int32_t NumSideChannelElements = br.getBits(4);
+ const int32_t NumBackChannelElements = br.getBits(4);
+ const int32_t NumLfeChannelElements = br.getBits(2);
+ const int32_t NumAssocDataElements = br.getBits(3);
+ const int32_t NumValidCcElements = br.getBits(4);
+
+ const int32_t MonoMixdownPresent = br.getBits(1);
+ if (MonoMixdownPresent != 0) {
+ const int32_t MonoMixdownElementNumber = br.getBits(4);
+ }
+
+ const int32_t StereoMixdownPresent = br.getBits(1);
+ if (StereoMixdownPresent != 0) {
+ const int32_t StereoMixdownElementNumber = br.getBits(4);
+ }
+
+ const int32_t MatrixMixdownIndexPresent = br.getBits(1);
+ if (MatrixMixdownIndexPresent != 0) {
+ const int32_t MatrixMixdownIndex = br.getBits(2);
+ const int32_t PseudoSurroundEnable = br.getBits(1);
+ }
- sampleRate = kSamplingRate[freqIndex];
+ int i;
+ for (i=0; i < NumFrontChannelElements; i++) {
+ const int32_t FrontElementIsCpe = br.getBits(1);
+ const int32_t FrontElementTagSelect = br.getBits(4);
+ channelsNum += FrontElementIsCpe ? 2 : 1;
+ }
+
+ for (i=0; i < NumSideChannelElements; i++) {
+ const int32_t SideElementIsCpe = br.getBits(1);
+ const int32_t SideElementTagSelect = br.getBits(4);
+ channelsNum += SideElementIsCpe ? 2 : 1;
+ }
+
+ for (i=0; i < NumBackChannelElements; i++) {
+ const int32_t BackElementIsCpe = br.getBits(1);
+ const int32_t BackElementTagSelect = br.getBits(4);
+ channelsNum += BackElementIsCpe ? 2 : 1;
+ }
+ channelsEffectiveNum = channelsNum;
+
+ for (i=0; i < NumLfeChannelElements; i++) {
+ const int32_t LfeElementTagSelect = br.getBits(4);
+ channelsNum += 1;
+ }
+ ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
+ ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
+ numChannels = channelsNum;
+ }
}
}
diff --git a/media/libstagefright/Utils.cpp b/media/libstagefright/Utils.cpp
index 9041c21..216a329 100644
--- a/media/libstagefright/Utils.cpp
+++ b/media/libstagefright/Utils.cpp
@@ -562,6 +562,17 @@ bool canOffloadStream(const sp<MetaData>& meta, bool hasVideo,
return false;
}
+ // check whether it is ELD/LD content -> no offloading
+ // FIXME: this should depend on audio DSP capabilities. mapMimeToAudioFormat() should use the
+ // metadata to refine the AAC format and the audio HAL should only list supported profiles.
+ int32_t aacaot = -1;
+ if (meta->findInt32(kKeyAACAOT, &aacaot)) {
+ if (aacaot == 23 || aacaot == 39 ) {
+ ALOGV("track of type '%s' is ELD/LD content", mime);
+ return false;
+ }
+ }
+
int32_t srate = -1;
if (!meta->findInt32(kKeySampleRate, &srate)) {
ALOGV("track of type '%s' does not publish sample rate", mime);
diff --git a/media/libstagefright/codecs/aacdec/SoftAAC2.cpp b/media/libstagefright/codecs/aacdec/SoftAAC2.cpp
index f842e27..2f5eff4 100644
--- a/media/libstagefright/codecs/aacdec/SoftAAC2.cpp
+++ b/media/libstagefright/codecs/aacdec/SoftAAC2.cpp
@@ -30,7 +30,7 @@
#define DRC_DEFAULT_MOBILE_REF_LEVEL 64 /* 64*-0.25dB = -16 dB below full scale for mobile conf */
#define DRC_DEFAULT_MOBILE_DRC_CUT 127 /* maximum compression of dynamic range for mobile conf */
#define DRC_DEFAULT_MOBILE_DRC_BOOST 127 /* maximum compression of dynamic range for mobile conf */
-#define MAX_CHANNEL_COUNT 6 /* maximum number of audio channels that can be decoded */
+#define MAX_CHANNEL_COUNT 8 /* maximum number of audio channels that can be decoded */
// names of properties that can be used to override the default DRC settings
#define PROP_DRC_OVERRIDE_REF_LEVEL "aac_drc_reference_level"
#define PROP_DRC_OVERRIDE_CUT "aac_drc_cut"
@@ -296,8 +296,11 @@ void SoftAAC2::maybeConfigureDownmix() const {
if (!(property_get("media.aac_51_output_enabled", value, NULL) &&
(!strcmp(value, "1") || !strcasecmp(value, "true")))) {
ALOGI("Downmixing multichannel AAC to stereo");
- aacDecoder_SetParam(mAACDecoder, AAC_PCM_OUTPUT_CHANNELS, 2);
+ aacDecoder_SetParam(mAACDecoder, AAC_PCM_MAX_OUTPUT_CHANNELS, 2);
mStreamInfo->numChannels = 2;
+ // By default, the decoder creates a 5.1 channel downmix signal
+ // for seven and eight channel input streams. To enable 6.1 and 7.1 channel output
+ // use aacDecoder_SetParam(mAACDecoder, AAC_PCM_MAX_OUTPUT_CHANNELS, -1)
}
}
}
diff --git a/media/libstagefright/httplive/M3UParser.cpp b/media/libstagefright/httplive/M3UParser.cpp
index 243888c..dd248cb 100644
--- a/media/libstagefright/httplive/M3UParser.cpp
+++ b/media/libstagefright/httplive/M3UParser.cpp
@@ -608,7 +608,7 @@ status_t M3UParser::parseMetaDataDuration(
if (meta->get() == NULL) {
*meta = new AMessage;
}
- (*meta)->setInt64(key, (int64_t)x * 1E6);
+ (*meta)->setInt64(key, (int64_t)(x * 1E6));
return OK;
}
diff --git a/media/libstagefright/httplive/PlaylistFetcher.cpp b/media/libstagefright/httplive/PlaylistFetcher.cpp
index 1754bf2..f095987 100644
--- a/media/libstagefright/httplive/PlaylistFetcher.cpp
+++ b/media/libstagefright/httplive/PlaylistFetcher.cpp
@@ -861,12 +861,13 @@ status_t PlaylistFetcher::extractAndQueueAccessUnits(
&& source->dequeueAccessUnit(&accessUnit) == OK) {
// Note that we do NOT dequeue any discontinuities.
+ // for simplicity, store a reference to the format in each unit
+ sp<MetaData> format = source->getFormat();
+ if (format != NULL) {
+ accessUnit->meta()->setObject("format", format);
+ }
packetSource->queueAccessUnit(accessUnit);
}
-
- if (packetSource->getFormat() == NULL) {
- packetSource->setFormat(source->getFormat());
- }
}
return OK;
diff --git a/media/libstagefright/mpeg2ts/AnotherPacketSource.cpp b/media/libstagefright/mpeg2ts/AnotherPacketSource.cpp
index 3153c8b..52fb2a5 100644
--- a/media/libstagefright/mpeg2ts/AnotherPacketSource.cpp
+++ b/media/libstagefright/mpeg2ts/AnotherPacketSource.cpp
@@ -70,7 +70,27 @@ status_t AnotherPacketSource::stop() {
}
sp<MetaData> AnotherPacketSource::getFormat() {
- return mFormat;
+ Mutex::Autolock autoLock(mLock);
+ if (mFormat != NULL) {
+ return mFormat;
+ }
+
+ List<sp<ABuffer> >::iterator it = mBuffers.begin();
+ while (it != mBuffers.end()) {
+ sp<ABuffer> buffer = *it;
+ int32_t discontinuity;
+ if (buffer->meta()->findInt32("discontinuity", &discontinuity)) {
+ break;
+ }
+
+ sp<RefBase> object;
+ if (buffer->meta()->findObject("format", &object)) {
+ return static_cast<MetaData*>(object.get());
+ }
+
+ ++it;
+ }
+ return NULL;
}
status_t AnotherPacketSource::dequeueAccessUnit(sp<ABuffer> *buffer) {
@@ -94,6 +114,11 @@ status_t AnotherPacketSource::dequeueAccessUnit(sp<ABuffer> *buffer) {
return INFO_DISCONTINUITY;
}
+ sp<RefBase> object;
+ if ((*buffer)->meta()->findObject("format", &object)) {
+ mFormat = static_cast<MetaData*>(object.get());
+ }
+
return OK;
}
@@ -120,17 +145,22 @@ status_t AnotherPacketSource::read(
}
return INFO_DISCONTINUITY;
- } else {
- int64_t timeUs;
- CHECK(buffer->meta()->findInt64("timeUs", &timeUs));
+ }
- MediaBuffer *mediaBuffer = new MediaBuffer(buffer);
+ sp<RefBase> object;
+ if (buffer->meta()->findObject("format", &object)) {
+ mFormat = static_cast<MetaData*>(object.get());
+ }
- mediaBuffer->meta_data()->setInt64(kKeyTime, timeUs);
+ int64_t timeUs;
+ CHECK(buffer->meta()->findInt64("timeUs", &timeUs));
- *out = mediaBuffer;
- return OK;
- }
+ MediaBuffer *mediaBuffer = new MediaBuffer(buffer);
+
+ mediaBuffer->meta_data()->setInt64(kKeyTime, timeUs);
+
+ *out = mediaBuffer;
+ return OK;
}
return mEOSResult;
diff --git a/services/audioflinger/Android.mk b/services/audioflinger/Android.mk
index 3ec9285..4524d3c 100644
--- a/services/audioflinger/Android.mk
+++ b/services/audioflinger/Android.mk
@@ -23,7 +23,8 @@ LOCAL_SRC_FILES:= \
AudioPolicyService.cpp \
ServiceUtilities.cpp \
AudioResamplerCubic.cpp.arm \
- AudioResamplerSinc.cpp.arm
+ AudioResamplerSinc.cpp.arm \
+ AudioResamplerDyn.cpp.arm
LOCAL_SRC_FILES += StateQueue.cpp
@@ -74,10 +75,11 @@ include $(BUILD_SHARED_LIBRARY)
include $(CLEAR_VARS)
LOCAL_SRC_FILES:= \
- test-resample.cpp \
+ test-resample.cpp \
AudioResampler.cpp.arm \
- AudioResamplerCubic.cpp.arm \
- AudioResamplerSinc.cpp.arm
+ AudioResamplerCubic.cpp.arm \
+ AudioResamplerSinc.cpp.arm \
+ AudioResamplerDyn.cpp.arm
LOCAL_C_INCLUDES := \
$(call include-path-for, audio-utils)
diff --git a/services/audioflinger/AudioPolicyService.cpp b/services/audioflinger/AudioPolicyService.cpp
index c5ad2c0..c8f0730 100644
--- a/services/audioflinger/AudioPolicyService.cpp
+++ b/services/audioflinger/AudioPolicyService.cpp
@@ -1441,6 +1441,14 @@ status_t AudioPolicyService::loadPreProcessorConfig(const char *path)
loadEffects(root, effects);
loadInputSources(root, effects);
+ // delete effects to fix memory leak.
+ // as effects is local var and valgrind would treat this as memory leak
+ // and although it only did in mediaserver init, but free it in case mediaserver reboot
+ size_t i;
+ for (i = 0; i < effects.size(); i++) {
+ delete effects[i];
+ }
+
config_free(root);
free(root);
free(data);
diff --git a/services/audioflinger/AudioResampler.cpp b/services/audioflinger/AudioResampler.cpp
index 323f1a4..3b5a8c1 100644
--- a/services/audioflinger/AudioResampler.cpp
+++ b/services/audioflinger/AudioResampler.cpp
@@ -25,6 +25,7 @@
#include "AudioResampler.h"
#include "AudioResamplerSinc.h"
#include "AudioResamplerCubic.h"
+#include "AudioResamplerDyn.h"
#ifdef __arm__
#include <machine/cpu-features.h>
@@ -85,6 +86,9 @@ bool AudioResampler::qualityIsSupported(src_quality quality)
case MED_QUALITY:
case HIGH_QUALITY:
case VERY_HIGH_QUALITY:
+ case DYN_LOW_QUALITY:
+ case DYN_MED_QUALITY:
+ case DYN_HIGH_QUALITY:
return true;
default:
return false;
@@ -105,7 +109,7 @@ void AudioResampler::init_routine()
if (*endptr == '\0') {
defaultQuality = (src_quality) l;
ALOGD("forcing AudioResampler quality to %d", defaultQuality);
- if (defaultQuality < DEFAULT_QUALITY || defaultQuality > VERY_HIGH_QUALITY) {
+ if (defaultQuality < DEFAULT_QUALITY || defaultQuality > DYN_HIGH_QUALITY) {
defaultQuality = DEFAULT_QUALITY;
}
}
@@ -125,6 +129,12 @@ uint32_t AudioResampler::qualityMHz(src_quality quality)
return 20;
case VERY_HIGH_QUALITY:
return 34;
+ case DYN_LOW_QUALITY:
+ return 4;
+ case DYN_MED_QUALITY:
+ return 6;
+ case DYN_HIGH_QUALITY:
+ return 12;
}
}
@@ -175,6 +185,15 @@ AudioResampler* AudioResampler::create(int bitDepth, int inChannelCount,
case VERY_HIGH_QUALITY:
quality = HIGH_QUALITY;
break;
+ case DYN_LOW_QUALITY:
+ atFinalQuality = true;
+ break;
+ case DYN_MED_QUALITY:
+ quality = DYN_LOW_QUALITY;
+ break;
+ case DYN_HIGH_QUALITY:
+ quality = DYN_MED_QUALITY;
+ break;
}
}
pthread_mutex_unlock(&mutex);
@@ -200,6 +219,12 @@ AudioResampler* AudioResampler::create(int bitDepth, int inChannelCount,
ALOGV("Create VERY_HIGH_QUALITY sinc Resampler = %d", quality);
resampler = new AudioResamplerSinc(bitDepth, inChannelCount, sampleRate, quality);
break;
+ case DYN_LOW_QUALITY:
+ case DYN_MED_QUALITY:
+ case DYN_HIGH_QUALITY:
+ ALOGV("Create dynamic Resampler = %d", quality);
+ resampler = new AudioResamplerDyn(bitDepth, inChannelCount, sampleRate, quality);
+ break;
}
// initialize resampler
diff --git a/services/audioflinger/AudioResampler.h b/services/audioflinger/AudioResampler.h
index 33e64ce..c341325 100644
--- a/services/audioflinger/AudioResampler.h
+++ b/services/audioflinger/AudioResampler.h
@@ -41,6 +41,9 @@ public:
MED_QUALITY=2,
HIGH_QUALITY=3,
VERY_HIGH_QUALITY=4,
+ DYN_LOW_QUALITY=5,
+ DYN_MED_QUALITY=6,
+ DYN_HIGH_QUALITY=7,
};
static AudioResampler* create(int bitDepth, int inChannelCount,
diff --git a/services/audioflinger/AudioResamplerDyn.cpp b/services/audioflinger/AudioResamplerDyn.cpp
new file mode 100644
index 0000000..984548d
--- /dev/null
+++ b/services/audioflinger/AudioResamplerDyn.cpp
@@ -0,0 +1,551 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "AudioResamplerDyn"
+//#define LOG_NDEBUG 0
+
+#include <malloc.h>
+#include <string.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+#include <math.h>
+
+#include <cutils/compiler.h>
+#include <cutils/properties.h>
+#include <utils/Log.h>
+
+#include "AudioResamplerFirOps.h" // USE_NEON and USE_INLINE_ASSEMBLY defined here
+#include "AudioResamplerFirProcess.h"
+#include "AudioResamplerFirProcessNeon.h"
+#include "AudioResamplerFirGen.h" // requires math.h
+#include "AudioResamplerDyn.h"
+
+//#define DEBUG_RESAMPLER
+
+namespace android {
+
+// generate a unique resample type compile-time constant (constexpr)
+#define RESAMPLETYPE(CHANNELS, LOCKED, STRIDE, COEFTYPE) \
+ ((((CHANNELS)-1)&1) | !!(LOCKED)<<1 | (COEFTYPE)<<2 \
+ | ((STRIDE)==8 ? 1 : (STRIDE)==16 ? 2 : 0)<<3)
+
+/*
+ * InBuffer is a type agnostic input buffer.
+ *
+ * Layout of the state buffer for halfNumCoefs=8.
+ *
+ * [rrrrrrppppppppnnnnnnnnrrrrrrrrrrrrrrrrrrr.... rrrrrrr]
+ * S I R
+ *
+ * S = mState
+ * I = mImpulse
+ * R = mRingFull
+ * p = past samples, convoluted with the (p)ositive side of sinc()
+ * n = future samples, convoluted with the (n)egative side of sinc()
+ * r = extra space for implementing the ring buffer
+ */
+
+template<typename TI>
+AudioResamplerDyn::InBuffer<TI>::InBuffer()
+ : mState(NULL), mImpulse(NULL), mRingFull(NULL), mStateSize(0) {
+}
+
+template<typename TI>
+AudioResamplerDyn::InBuffer<TI>::~InBuffer() {
+ init();
+}
+
+template<typename TI>
+void AudioResamplerDyn::InBuffer<TI>::init() {
+ free(mState);
+ mState = NULL;
+ mImpulse = NULL;
+ mRingFull = NULL;
+ mStateSize = 0;
+}
+
+// resizes the state buffer to accommodate the appropriate filter length
+template<typename TI>
+void AudioResamplerDyn::InBuffer<TI>::resize(int CHANNELS, int halfNumCoefs) {
+ // calculate desired state size
+ int stateSize = halfNumCoefs * CHANNELS * 2
+ * kStateSizeMultipleOfFilterLength;
+
+ // check if buffer needs resizing
+ if (mState
+ && stateSize == mStateSize
+ && mRingFull-mState == mStateSize-halfNumCoefs*CHANNELS) {
+ return;
+ }
+
+ // create new buffer
+ TI* state = (int16_t*)memalign(32, stateSize*sizeof(*state));
+ memset(state, 0, stateSize*sizeof(*state));
+
+ // attempt to preserve state
+ if (mState) {
+ TI* srcLo = mImpulse - halfNumCoefs*CHANNELS;
+ TI* srcHi = mImpulse + halfNumCoefs*CHANNELS;
+ TI* dst = state;
+
+ if (srcLo < mState) {
+ dst += mState-srcLo;
+ srcLo = mState;
+ }
+ if (srcHi > mState + mStateSize) {
+ srcHi = mState + mStateSize;
+ }
+ memcpy(dst, srcLo, (srcHi - srcLo) * sizeof(*srcLo));
+ free(mState);
+ }
+
+ // set class member vars
+ mState = state;
+ mStateSize = stateSize;
+ mImpulse = mState + halfNumCoefs*CHANNELS; // actually one sample greater than needed
+ mRingFull = mState + mStateSize - halfNumCoefs*CHANNELS;
+}
+
+// copy in the input data into the head (impulse+halfNumCoefs) of the buffer.
+template<typename TI>
+template<int CHANNELS>
+void AudioResamplerDyn::InBuffer<TI>::readAgain(TI*& impulse, const int halfNumCoefs,
+ const TI* const in, const size_t inputIndex) {
+ int16_t* head = impulse + halfNumCoefs*CHANNELS;
+ for (size_t i=0 ; i<CHANNELS ; i++) {
+ head[i] = in[inputIndex*CHANNELS + i];
+ }
+}
+
+// advance the impulse pointer, and load in data into the head (impulse+halfNumCoefs)
+template<typename TI>
+template<int CHANNELS>
+void AudioResamplerDyn::InBuffer<TI>::readAdvance(TI*& impulse, const int halfNumCoefs,
+ const TI* const in, const size_t inputIndex) {
+ impulse += CHANNELS;
+
+ if (CC_UNLIKELY(impulse >= mRingFull)) {
+ const size_t shiftDown = mRingFull - mState - halfNumCoefs*CHANNELS;
+ memcpy(mState, mState+shiftDown, halfNumCoefs*CHANNELS*2*sizeof(TI));
+ impulse -= shiftDown;
+ }
+ readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
+}
+
+void AudioResamplerDyn::Constants::set(
+ int L, int halfNumCoefs, int inSampleRate, int outSampleRate)
+{
+ int bits = 0;
+ int lscale = inSampleRate/outSampleRate < 2 ? L - 1 :
+ static_cast<int>(static_cast<uint64_t>(L)*inSampleRate/outSampleRate);
+ for (int i=lscale; i; ++bits, i>>=1)
+ ;
+ mL = L;
+ mShift = kNumPhaseBits - bits;
+ mHalfNumCoefs = halfNumCoefs;
+}
+
+AudioResamplerDyn::AudioResamplerDyn(int bitDepth,
+ int inChannelCount, int32_t sampleRate, src_quality quality)
+ : AudioResampler(bitDepth, inChannelCount, sampleRate, quality),
+ mResampleType(0), mFilterSampleRate(0), mFilterQuality(DEFAULT_QUALITY),
+ mCoefBuffer(NULL)
+{
+ mVolumeSimd[0] = mVolumeSimd[1] = 0;
+ mConstants.set(128, 8, mSampleRate, mSampleRate); // TODO: set better
+}
+
+AudioResamplerDyn::~AudioResamplerDyn() {
+ free(mCoefBuffer);
+}
+
+void AudioResamplerDyn::init() {
+ mFilterSampleRate = 0; // always trigger new filter generation
+ mInBuffer.init();
+}
+
+void AudioResamplerDyn::setVolume(int16_t left, int16_t right) {
+ AudioResampler::setVolume(left, right);
+ mVolumeSimd[0] = static_cast<int32_t>(left)<<16;
+ mVolumeSimd[1] = static_cast<int32_t>(right)<<16;
+}
+
+template <typename T> T max(T a, T b) {return a > b ? a : b;}
+
+template <typename T> T absdiff(T a, T b) {return a > b ? a - b : b - a;}
+
+template<typename T>
+void AudioResamplerDyn::createKaiserFir(Constants &c, double stopBandAtten,
+ int inSampleRate, int outSampleRate, double tbwCheat) {
+ T* buf = reinterpret_cast<T*>(memalign(32, (c.mL+1)*c.mHalfNumCoefs*sizeof(T)));
+ static const double atten = 0.9998; // to avoid ripple overflow
+ double fcr;
+ double tbw = firKaiserTbw(c.mHalfNumCoefs, stopBandAtten);
+
+ if (inSampleRate < outSampleRate) { // upsample
+ fcr = max(0.5*tbwCheat - tbw/2, tbw/2);
+ } else { // downsample
+ fcr = max(0.5*tbwCheat*outSampleRate/inSampleRate - tbw/2, tbw/2);
+ }
+ // create and set filter
+ firKaiserGen(buf, c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten);
+ c.setBuf(buf);
+ if (mCoefBuffer) {
+ free(mCoefBuffer);
+ }
+ mCoefBuffer = buf;
+#ifdef DEBUG_RESAMPLER
+ // print basic filter stats
+ printf("L:%d hnc:%d stopBandAtten:%lf fcr:%lf atten:%lf tbw:%lf\n",
+ c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten, tbw);
+ // test the filter and report results
+ double fp = (fcr - tbw/2)/c.mL;
+ double fs = (fcr + tbw/2)/c.mL;
+ double passMin, passMax, passRipple;
+ double stopMax, stopRipple;
+ testFir(buf, c.mL, c.mHalfNumCoefs, fp, fs, /*passSteps*/ 1000, /*stopSteps*/ 100000,
+ passMin, passMax, passRipple, stopMax, stopRipple);
+ printf("passband(%lf, %lf): %.8lf %.8lf %.8lf\n", 0., fp, passMin, passMax, passRipple);
+ printf("stopband(%lf, %lf): %.8lf %.3lf\n", fs, 0.5, stopMax, stopRipple);
+#endif
+}
+
+// recursive gcd. Using objdump, it appears the tail recursion is converted to a while loop.
+static int gcd(int n, int m) {
+ if (m == 0) {
+ return n;
+ }
+ return gcd(m, n % m);
+}
+
+static bool isClose(int32_t newSampleRate, int32_t prevSampleRate,
+ int32_t filterSampleRate, int32_t outSampleRate) {
+
+ // different upsampling ratios do not need a filter change.
+ if (filterSampleRate != 0
+ && filterSampleRate < outSampleRate
+ && newSampleRate < outSampleRate)
+ return true;
+
+ // check design criteria again if downsampling is detected.
+ int pdiff = absdiff(newSampleRate, prevSampleRate);
+ int adiff = absdiff(newSampleRate, filterSampleRate);
+
+ // allow up to 6% relative change increments.
+ // allow up to 12% absolute change increments (from filter design)
+ return pdiff < prevSampleRate>>4 && adiff < filterSampleRate>>3;
+}
+
+void AudioResamplerDyn::setSampleRate(int32_t inSampleRate) {
+ if (mInSampleRate == inSampleRate) {
+ return;
+ }
+ int32_t oldSampleRate = mInSampleRate;
+ int32_t oldHalfNumCoefs = mConstants.mHalfNumCoefs;
+ uint32_t oldPhaseWrapLimit = mConstants.mL << mConstants.mShift;
+ bool useS32 = false;
+
+ mInSampleRate = inSampleRate;
+
+ // TODO: Add precalculated Equiripple filters
+
+ if (mFilterQuality != getQuality() ||
+ !isClose(inSampleRate, oldSampleRate, mFilterSampleRate, mSampleRate)) {
+ mFilterSampleRate = inSampleRate;
+ mFilterQuality = getQuality();
+
+ // Begin Kaiser Filter computation
+ //
+ // The quantization floor for S16 is about 96db - 10*log_10(#length) + 3dB.
+ // Keep the stop band attenuation no greater than 84-85dB for 32 length S16 filters
+ //
+ // For s32 we keep the stop band attenuation at the same as 16b resolution, about
+ // 96-98dB
+ //
+
+ double stopBandAtten;
+ double tbwCheat = 1.; // how much we "cheat" into aliasing
+ int halfLength;
+ if (mFilterQuality == DYN_HIGH_QUALITY) {
+ // 32b coefficients, 64 length
+ useS32 = true;
+ stopBandAtten = 98.;
+ halfLength = 32;
+ } else if (mFilterQuality == DYN_LOW_QUALITY) {
+ // 16b coefficients, 16-32 length
+ useS32 = false;
+ stopBandAtten = 80.;
+ if (mSampleRate >= inSampleRate * 2) {
+ halfLength = 16;
+ } else {
+ halfLength = 8;
+ }
+ if (mSampleRate >= inSampleRate) {
+ tbwCheat = 1.05;
+ } else {
+ tbwCheat = 1.03;
+ }
+ } else { // DYN_MED_QUALITY
+ // 16b coefficients, 32-64 length
+ // note: > 64 length filters with 16b coefs can have quantization noise problems
+ useS32 = false;
+ stopBandAtten = 84.;
+ if (mSampleRate >= inSampleRate * 4) {
+ halfLength = 32;
+ } else if (mSampleRate >= inSampleRate * 2) {
+ halfLength = 24;
+ } else {
+ halfLength = 16;
+ }
+ if (mSampleRate >= inSampleRate) {
+ tbwCheat = 1.03;
+ } else {
+ tbwCheat = 1.01;
+ }
+ }
+
+ // determine the number of polyphases in the filterbank.
+ // for 16b, it is desirable to have 2^(16/2) = 256 phases.
+ // https://ccrma.stanford.edu/~jos/resample/Relation_Interpolation_Error_Quantization.html
+ //
+ // We are a bit more lax on this.
+
+ int phases = mSampleRate / gcd(mSampleRate, inSampleRate);
+
+ // TODO: Once dynamic sample rate change is an option, the code below
+ // should be modified to execute only when dynamic sample rate change is enabled.
+ //
+ // as above, #phases less than 63 is too few phases for accurate linear interpolation.
+ // we increase the phases to compensate, but more phases means more memory per
+ // filter and more time to compute the filter.
+ //
+ // if we know that the filter will be used for dynamic sample rate changes,
+ // that would allow us skip this part for fixed sample rate resamplers.
+ //
+ while (phases<63) {
+ phases *= 2; // this code only needed to support dynamic rate changes
+ }
+
+ if (phases>=256) { // too many phases, always interpolate
+ phases = 127;
+ }
+
+ // create the filter
+ mConstants.set(phases, halfLength, inSampleRate, mSampleRate);
+ if (useS32) {
+ createKaiserFir<int32_t>(mConstants, stopBandAtten,
+ inSampleRate, mSampleRate, tbwCheat);
+ } else {
+ createKaiserFir<int16_t>(mConstants, stopBandAtten,
+ inSampleRate, mSampleRate, tbwCheat);
+ }
+ } // End Kaiser filter
+
+ // update phase and state based on the new filter.
+ const Constants& c(mConstants);
+ mInBuffer.resize(mChannelCount, c.mHalfNumCoefs);
+ const uint32_t phaseWrapLimit = c.mL << c.mShift;
+ // try to preserve as much of the phase fraction as possible for on-the-fly changes
+ mPhaseFraction = static_cast<unsigned long long>(mPhaseFraction)
+ * phaseWrapLimit / oldPhaseWrapLimit;
+ mPhaseFraction %= phaseWrapLimit; // should not do anything, but just in case.
+ mPhaseIncrement = static_cast<uint32_t>(static_cast<double>(phaseWrapLimit)
+ * inSampleRate / mSampleRate);
+
+ // determine which resampler to use
+ // check if locked phase (works only if mPhaseIncrement has no "fractional phase bits")
+ int locked = (mPhaseIncrement << (sizeof(mPhaseIncrement)*8 - c.mShift)) == 0;
+ int stride = (c.mHalfNumCoefs&7)==0 ? 16 : (c.mHalfNumCoefs&3)==0 ? 8 : 2;
+ if (locked) {
+ mPhaseFraction = mPhaseFraction >> c.mShift << c.mShift; // remove fractional phase
+ }
+ if (!USE_NEON) {
+ stride = 2; // C version only
+ }
+ // TODO: Remove this for testing
+ //stride = 2;
+ mResampleType = RESAMPLETYPE(mChannelCount, locked, stride, !!useS32);
+#ifdef DEBUG_RESAMPLER
+ printf("channels:%d %s stride:%d %s coef:%d shift:%d\n",
+ mChannelCount, locked ? "locked" : "interpolated",
+ stride, useS32 ? "S32" : "S16", 2*c.mHalfNumCoefs, c.mShift);
+#endif
+}
+
+void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount,
+ AudioBufferProvider* provider)
+{
+ // TODO:
+ // 24 cases - this perhaps can be reduced later, as testing might take too long
+ switch (mResampleType) {
+
+ // stride 16 (stride 2 for machines that do not support NEON)
+ case RESAMPLETYPE(1, true, 16, 0):
+ return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
+ case RESAMPLETYPE(2, true, 16, 0):
+ return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
+ case RESAMPLETYPE(1, false, 16, 0):
+ return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
+ case RESAMPLETYPE(2, false, 16, 0):
+ return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
+ case RESAMPLETYPE(1, true, 16, 1):
+ return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
+ case RESAMPLETYPE(2, true, 16, 1):
+ return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
+ case RESAMPLETYPE(1, false, 16, 1):
+ return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
+ case RESAMPLETYPE(2, false, 16, 1):
+ return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
+#if 0
+ // TODO: Remove these?
+ // stride 8
+ case RESAMPLETYPE(1, true, 8, 0):
+ return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
+ case RESAMPLETYPE(2, true, 8, 0):
+ return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
+ case RESAMPLETYPE(1, false, 8, 0):
+ return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
+ case RESAMPLETYPE(2, false, 8, 0):
+ return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
+ case RESAMPLETYPE(1, true, 8, 1):
+ return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
+ case RESAMPLETYPE(2, true, 8, 1):
+ return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
+ case RESAMPLETYPE(1, false, 8, 1):
+ return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
+ case RESAMPLETYPE(2, false, 8, 1):
+ return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
+ // stride 2 (can handle any filter length)
+ case RESAMPLETYPE(1, true, 2, 0):
+ return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
+ case RESAMPLETYPE(2, true, 2, 0):
+ return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
+ case RESAMPLETYPE(1, false, 2, 0):
+ return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
+ case RESAMPLETYPE(2, false, 2, 0):
+ return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
+ case RESAMPLETYPE(1, true, 2, 1):
+ return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
+ case RESAMPLETYPE(2, true, 2, 1):
+ return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
+ case RESAMPLETYPE(1, false, 2, 1):
+ return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
+ case RESAMPLETYPE(2, false, 2, 1):
+ return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
+#endif
+ default:
+ ; // error
+ }
+}
+
+template<int CHANNELS, bool LOCKED, int STRIDE, typename TC>
+void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount,
+ const TC* const coefs, AudioBufferProvider* provider)
+{
+ const Constants& c(mConstants);
+ int16_t* impulse = mInBuffer.getImpulse();
+ size_t inputIndex = mInputIndex;
+ uint32_t phaseFraction = mPhaseFraction;
+ const uint32_t phaseIncrement = mPhaseIncrement;
+ size_t outputIndex = 0;
+ size_t outputSampleCount = outFrameCount * 2; // stereo output
+ size_t inFrameCount = (outFrameCount*mInSampleRate)/mSampleRate;
+ const uint32_t phaseWrapLimit = c.mL << c.mShift;
+
+ // NOTE: be very careful when modifying the code here. register
+ // pressure is very high and a small change might cause the compiler
+ // to generate far less efficient code.
+ // Always sanity check the result with objdump or test-resample.
+
+ // the following logic is a bit convoluted to keep the main processing loop
+ // as tight as possible with register allocation.
+ while (outputIndex < outputSampleCount) {
+ // buffer is empty, fetch a new one
+ while (mBuffer.frameCount == 0) {
+ mBuffer.frameCount = inFrameCount;
+ provider->getNextBuffer(&mBuffer,
+ calculateOutputPTS(outputIndex / 2));
+ if (mBuffer.raw == NULL) {
+ goto resample_exit;
+ }
+ if (phaseFraction >= phaseWrapLimit) { // read in data
+ mInBuffer.readAdvance<CHANNELS>(
+ impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex);
+ phaseFraction -= phaseWrapLimit;
+ while (phaseFraction >= phaseWrapLimit) {
+ inputIndex++;
+ if (inputIndex >= mBuffer.frameCount) {
+ inputIndex -= mBuffer.frameCount;
+ provider->releaseBuffer(&mBuffer);
+ break;
+ }
+ mInBuffer.readAdvance<CHANNELS>(
+ impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex);
+ phaseFraction -= phaseWrapLimit;
+ }
+ }
+ }
+ const int16_t* const in = mBuffer.i16;
+ const size_t frameCount = mBuffer.frameCount;
+ const int coefShift = c.mShift;
+ const int halfNumCoefs = c.mHalfNumCoefs;
+ const int32_t* const volumeSimd = mVolumeSimd;
+
+ // reread the last input in.
+ mInBuffer.readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
+
+ // main processing loop
+ while (CC_LIKELY(outputIndex < outputSampleCount)) {
+ // caution: fir() is inlined and may be large.
+ // output will be loaded with the appropriate values
+ //
+ // from the input samples in impulse[-halfNumCoefs+1]... impulse[halfNumCoefs]
+ // from the polyphase filter of (phaseFraction / phaseWrapLimit) in coefs.
+ //
+ fir<CHANNELS, LOCKED, STRIDE>(
+ &out[outputIndex],
+ phaseFraction, phaseWrapLimit,
+ coefShift, halfNumCoefs, coefs,
+ impulse, volumeSimd);
+ outputIndex += 2;
+
+ phaseFraction += phaseIncrement;
+ while (phaseFraction >= phaseWrapLimit) {
+ inputIndex++;
+ if (inputIndex >= frameCount) {
+ goto done; // need a new buffer
+ }
+ mInBuffer.readAdvance<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
+ phaseFraction -= phaseWrapLimit;
+ }
+ }
+done:
+ // often arrives here when input buffer runs out
+ if (inputIndex >= frameCount) {
+ inputIndex -= frameCount;
+ provider->releaseBuffer(&mBuffer);
+ // mBuffer.frameCount MUST be zero here.
+ }
+ }
+
+resample_exit:
+ mInBuffer.setImpulse(impulse);
+ mInputIndex = inputIndex;
+ mPhaseFraction = phaseFraction;
+}
+
+// ----------------------------------------------------------------------------
+}; // namespace android
diff --git a/services/audioflinger/AudioResamplerDyn.h b/services/audioflinger/AudioResamplerDyn.h
new file mode 100644
index 0000000..df1fdbe
--- /dev/null
+++ b/services/audioflinger/AudioResamplerDyn.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_AUDIO_RESAMPLER_DYN_H
+#define ANDROID_AUDIO_RESAMPLER_DYN_H
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <cutils/log.h>
+
+#include "AudioResampler.h"
+
+namespace android {
+
+class AudioResamplerDyn: public AudioResampler {
+public:
+ AudioResamplerDyn(int bitDepth, int inChannelCount, int32_t sampleRate,
+ src_quality quality);
+
+ virtual ~AudioResamplerDyn();
+
+ virtual void init();
+
+ virtual void setSampleRate(int32_t inSampleRate);
+
+ virtual void setVolume(int16_t left, int16_t right);
+
+ virtual void resample(int32_t* out, size_t outFrameCount,
+ AudioBufferProvider* provider);
+
+private:
+
+ class Constants { // stores the filter constants.
+ public:
+ Constants() :
+ mL(0), mShift(0), mHalfNumCoefs(0), mFirCoefsS16(NULL)
+ {}
+ void set(int L, int halfNumCoefs,
+ int inSampleRate, int outSampleRate);
+ inline void setBuf(int16_t* buf) {
+ mFirCoefsS16 = buf;
+ }
+ inline void setBuf(int32_t* buf) {
+ mFirCoefsS32 = buf;
+ }
+
+ int mL; // interpolation phases in the filter.
+ int mShift; // right shift to get polyphase index
+ unsigned int mHalfNumCoefs; // filter half #coefs
+ union { // polyphase filter bank
+ const int16_t* mFirCoefsS16;
+ const int32_t* mFirCoefsS32;
+ };
+ };
+
+ // Input buffer management for a given input type TI, now (int16_t)
+ // Is agnostic of the actual type, can work with int32_t and float.
+ template<typename TI>
+ class InBuffer {
+ public:
+ InBuffer();
+ ~InBuffer();
+ void init();
+ void resize(int CHANNELS, int halfNumCoefs);
+
+ // used for direct management of the mImpulse pointer
+ inline TI* getImpulse() {
+ return mImpulse;
+ }
+ inline void setImpulse(TI *impulse) {
+ mImpulse = impulse;
+ }
+ template<int CHANNELS>
+ inline void readAgain(TI*& impulse, const int halfNumCoefs,
+ const TI* const in, const size_t inputIndex);
+ template<int CHANNELS>
+ inline void readAdvance(TI*& impulse, const int halfNumCoefs,
+ const TI* const in, const size_t inputIndex);
+
+ private:
+ // tuning parameter guidelines: 2 <= multiple <= 8
+ static const int kStateSizeMultipleOfFilterLength = 4;
+
+ TI* mState; // base pointer for the input buffer storage
+ TI* mImpulse; // current location of the impulse response (centered)
+ TI* mRingFull; // mState <= mImpulse < mRingFull
+ // in general, mRingFull = mState + mStateSize - halfNumCoefs*CHANNELS.
+ size_t mStateSize; // in units of TI.
+ };
+
+ template<int CHANNELS, bool LOCKED, int STRIDE, typename TC>
+ void resample(int32_t* out, size_t outFrameCount,
+ const TC* const coefs, AudioBufferProvider* provider);
+
+ template<typename T>
+ void createKaiserFir(Constants &c, double stopBandAtten,
+ int inSampleRate, int outSampleRate, double tbwCheat);
+
+ InBuffer<int16_t> mInBuffer;
+ Constants mConstants; // current set of coefficient parameters
+ int32_t __attribute__ ((aligned (8))) mVolumeSimd[2];
+ int32_t mResampleType; // contains the resample type.
+ int32_t mFilterSampleRate; // designed filter sample rate.
+ src_quality mFilterQuality; // designed filter quality.
+ void* mCoefBuffer; // if a filter is created, this is not null
+};
+
+// ----------------------------------------------------------------------------
+}; // namespace android
+
+#endif /*ANDROID_AUDIO_RESAMPLER_DYN_H*/
diff --git a/services/audioflinger/AudioResamplerFirGen.h b/services/audioflinger/AudioResamplerFirGen.h
new file mode 100644
index 0000000..fac3001
--- /dev/null
+++ b/services/audioflinger/AudioResamplerFirGen.h
@@ -0,0 +1,684 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
+#define ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
+
+namespace android {
+
+/*
+ * generates a sine wave at equal steps.
+ *
+ * As most of our functions use sine or cosine at equal steps,
+ * it is very efficient to compute them that way (single multiply and subtract),
+ * rather than invoking the math library sin() or cos() each time.
+ *
+ * SineGen uses Goertzel's Algorithm (as a generator not a filter)
+ * to calculate sine(wstart + n * wstep) or cosine(wstart + n * wstep)
+ * by stepping through 0, 1, ... n.
+ *
+ * e^i(wstart+wstep) = 2cos(wstep) * e^i(wstart) - e^i(wstart-wstep)
+ *
+ * or looking at just the imaginary sine term, as the cosine follows identically:
+ *
+ * sin(wstart+wstep) = 2cos(wstep) * sin(wstart) - sin(wstart-wstep)
+ *
+ * Goertzel's algorithm is more efficient than the angle addition formula,
+ * e^i(wstart+wstep) = e^i(wstart) * e^i(wstep), which takes up to
+ * 4 multiplies and 2 adds (or 3* and 3+) and requires both sine and
+ * cosine generation due to the complex * complex multiply (full rotation).
+ *
+ * See: http://en.wikipedia.org/wiki/Goertzel_algorithm
+ *
+ */
+
+class SineGen {
+public:
+ SineGen(double wstart, double wstep, bool cosine = false) {
+ if (cosine) {
+ mCurrent = cos(wstart);
+ mPrevious = cos(wstart - wstep);
+ } else {
+ mCurrent = sin(wstart);
+ mPrevious = sin(wstart - wstep);
+ }
+ mTwoCos = 2.*cos(wstep);
+ }
+ SineGen(double expNow, double expPrev, double twoCosStep) {
+ mCurrent = expNow;
+ mPrevious = expPrev;
+ mTwoCos = twoCosStep;
+ }
+ inline double value() const {
+ return mCurrent;
+ }
+ inline void advance() {
+ double tmp = mCurrent;
+ mCurrent = mCurrent*mTwoCos - mPrevious;
+ mPrevious = tmp;
+ }
+ inline double valueAdvance() {
+ double tmp = mCurrent;
+ mCurrent = mCurrent*mTwoCos - mPrevious;
+ mPrevious = tmp;
+ return tmp;
+ }
+
+private:
+ double mCurrent; // current value of sine/cosine
+ double mPrevious; // previous value of sine/cosine
+ double mTwoCos; // stepping factor
+};
+
+/*
+ * generates a series of sine generators, phase offset by fixed steps.
+ *
+ * This is used to generate polyphase sine generators, one per polyphase
+ * in the filter code below.
+ *
+ * The SineGen returned by value() starts at innerStart = outerStart + n*outerStep;
+ * increments by innerStep.
+ *
+ */
+
+class SineGenGen {
+public:
+ SineGenGen(double outerStart, double outerStep, double innerStep, bool cosine = false)
+ : mSineInnerCur(outerStart, outerStep, cosine),
+ mSineInnerPrev(outerStart-innerStep, outerStep, cosine)
+ {
+ mTwoCos = 2.*cos(innerStep);
+ }
+ inline SineGen value() {
+ return SineGen(mSineInnerCur.value(), mSineInnerPrev.value(), mTwoCos);
+ }
+ inline void advance() {
+ mSineInnerCur.advance();
+ mSineInnerPrev.advance();
+ }
+ inline SineGen valueAdvance() {
+ return SineGen(mSineInnerCur.valueAdvance(), mSineInnerPrev.valueAdvance(), mTwoCos);
+ }
+
+private:
+ SineGen mSineInnerCur; // generate the inner sine values (stepped by outerStep).
+ SineGen mSineInnerPrev; // generate the inner sine previous values
+ // (behind by innerStep, stepped by outerStep).
+ double mTwoCos; // the inner stepping factor for the returned SineGen.
+};
+
+static inline double sqr(double x) {
+ return x * x;
+}
+
+/*
+ * rounds a double to the nearest integer for FIR coefficients.
+ *
+ * One variant uses noise shaping, which must keep error history
+ * to work (the err parameter, initialized to 0).
+ * The other variant is a non-noise shaped version for
+ * S32 coefficients (noise shaping doesn't gain much).
+ *
+ * Caution: No bounds saturation is applied, but isn't needed in this case.
+ *
+ * @param x is the value to round.
+ *
+ * @param maxval is the maximum integer scale factor expressed as an int64 (for headroom).
+ * Typically this may be the maximum positive integer+1 (using the fact that double precision
+ * FIR coefficients generated here are never that close to 1.0 to pose an overflow condition).
+ *
+ * @param err is the previous error (actual - rounded) for the previous rounding op.
+ * For 16b coefficients this can improve stopband dB performance by up to 2dB.
+ *
+ * Many variants exist for the noise shaping: http://en.wikipedia.org/wiki/Noise_shaping
+ *
+ */
+
+static inline int64_t toint(double x, int64_t maxval, double& err) {
+ double val = x * maxval;
+ double ival = floor(val + 0.5 + err*0.2);
+ err = val - ival;
+ return static_cast<int64_t>(ival);
+}
+
+static inline int64_t toint(double x, int64_t maxval) {
+ return static_cast<int64_t>(floor(x * maxval + 0.5));
+}
+
+/*
+ * Modified Bessel function of the first kind
+ * http://en.wikipedia.org/wiki/Bessel_function
+ *
+ * The formulas are taken from Abramowitz and Stegun,
+ * _Handbook of Mathematical Functions_ (links below):
+ *
+ * http://people.math.sfu.ca/~cbm/aands/page_375.htm
+ * http://people.math.sfu.ca/~cbm/aands/page_378.htm
+ *
+ * http://dlmf.nist.gov/10.25
+ * http://dlmf.nist.gov/10.40
+ *
+ * Note we assume x is nonnegative (the function is symmetric,
+ * pass in the absolute value as needed).
+ *
+ * Constants are compile time derived with templates I0Term<> and
+ * I0ATerm<> to the precision of the compiler. The series can be expanded
+ * to any precision needed, but currently set around 24b precision.
+ *
+ * We use a bit of template math here, constexpr would probably be
+ * more appropriate for a C++11 compiler.
+ *
+ * For the intermediate range 3.75 < x < 15, we use minimax polynomial fit.
+ *
+ */
+
+template <int N>
+struct I0Term {
+ static const double value = I0Term<N-1>::value / (4. * N * N);
+};
+
+template <>
+struct I0Term<0> {
+ static const double value = 1.;
+};
+
+template <int N>
+struct I0ATerm {
+ static const double value = I0ATerm<N-1>::value * (2.*N-1.) * (2.*N-1.) / (8. * N);
+};
+
+template <>
+struct I0ATerm<0> { // 1/sqrt(2*PI);
+ static const double value = 0.398942280401432677939946059934381868475858631164934657665925;
+};
+
+#if USE_HORNERS_METHOD
+/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
+ * using Horner's Method: http://en.wikipedia.org/wiki/Horner's_method
+ *
+ * This has fewer multiplications than Estrin's method below, but has back to back
+ * floating point dependencies.
+ *
+ * On ARM this appears to work slower, so USE_HORNERS_METHOD is not default enabled.
+ */
+
+inline double Poly2(double A, double B, double x) {
+ return A + x * B;
+}
+
+inline double Poly4(double A, double B, double C, double D, double x) {
+ return A + x * (B + x * (C + x * (D)));
+}
+
+inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
+ double x) {
+ return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G))))));
+}
+
+inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
+ double H, double I, double x) {
+ return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G + x * (H + x * (I))))))));
+}
+
+#else
+/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
+ * using Estrin's Method: http://en.wikipedia.org/wiki/Estrin's_scheme
+ *
+ * This is typically faster, perhaps gains about 5-10% overall on ARM processors
+ * over Horner's method above.
+ */
+
+inline double Poly2(double A, double B, double x) {
+ return A + B * x;
+}
+
+inline double Poly3(double A, double B, double C, double x, double x2) {
+ return Poly2(A, B, x) + C * x2;
+}
+
+inline double Poly3(double A, double B, double C, double x) {
+ return Poly2(A, B, x) + C * x * x;
+}
+
+inline double Poly4(double A, double B, double C, double D, double x, double x2) {
+ return Poly2(A, B, x) + Poly2(C, D, x) * x2; // same as poly2(poly2, poly2, x2);
+}
+
+inline double Poly4(double A, double B, double C, double D, double x) {
+ return Poly4(A, B, C, D, x, x * x);
+}
+
+inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
+ double x) {
+ double x2 = x * x;
+ return Poly4(A, B, C, D, x, x2) + Poly3(E, F, G, x, x2) * (x2 * x2);
+}
+
+inline double Poly8(double A, double B, double C, double D, double E, double F, double G,
+ double H, double x, double x2, double x4) {
+ return Poly4(A, B, C, D, x, x2) + Poly4(E, F, G, H, x, x2) * x4;
+}
+
+inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
+ double H, double I, double x) {
+ double x2 = x * x;
+#if 1
+ // It does not seem faster to explicitly decompose Poly8 into Poly4, but
+ // could depend on compiler floating point scheduling.
+ double x4 = x2 * x2;
+ return Poly8(A, B, C, D, E, F, G, H, x, x2, x4) + I * (x4 * x4);
+#else
+ double val = Poly4(A, B, C, D, x, x2);
+ double x4 = x2 * x2;
+ return val + Poly4(E, F, G, H, x, x2) * x4 + I * (x4 * x4);
+#endif
+}
+#endif
+
+static inline double I0(double x) {
+ if (x < 3.75) {
+ x *= x;
+ return Poly7(I0Term<0>::value, I0Term<1>::value,
+ I0Term<2>::value, I0Term<3>::value,
+ I0Term<4>::value, I0Term<5>::value,
+ I0Term<6>::value, x); // e < 1.6e-7
+ }
+ if (1) {
+ /*
+ * Series expansion coefs are easy to calculate, but are expanded around 0,
+ * so error is unequal over the interval 0 < x < 3.75, the error being
+ * significantly better near 0.
+ *
+ * A better solution is to use precise minimax polynomial fits.
+ *
+ * We use a slightly more complicated solution for 3.75 < x < 15, based on
+ * the tables in Blair and Edwards, "Stable Rational Minimax Approximations
+ * to the Modified Bessel Functions I0(x) and I1(x)", Chalk Hill Nuclear Laboratory,
+ * AECL-4928.
+ *
+ * http://www.iaea.org/inis/collection/NCLCollectionStore/_Public/06/178/6178667.pdf
+ *
+ * See Table 11 for 0 < x < 15; e < 10^(-7.13).
+ *
+ * Note: Beta cannot exceed 15 (hence Stopband cannot exceed 144dB = 24b).
+ *
+ * This speeds up overall computation by about 40% over using the else clause below,
+ * which requires sqrt and exp.
+ *
+ */
+
+ x *= x;
+ double num = Poly9(-0.13544938430e9, -0.33153754512e8,
+ -0.19406631946e7, -0.48058318783e5,
+ -0.63269783360e3, -0.49520779070e1,
+ -0.24970910370e-1, -0.74741159550e-4,
+ -0.18257612460e-6, x);
+ double y = x - 225.; // reflection around 15 (squared)
+ double den = Poly4(-0.34598737196e8, 0.23852643181e6,
+ -0.70699387620e3, 0.10000000000e1, y);
+ return num / den;
+
+#if IO_EXTENDED_BETA
+ /* Table 42 for x > 15; e < 10^(-8.11).
+ * This is used for Beta>15, but is disabled here as
+ * we never use Beta that high.
+ *
+ * NOTE: This should be enabled only for x > 15.
+ */
+
+ double y = 1./x;
+ double z = y - (1./15);
+ double num = Poly2(0.415079861746e1, -0.5149092496e1, z);
+ double den = Poly3(0.103150763823e2, -0.14181687413e2,
+ 0.1000000000e1, z);
+ return exp(x) * sqrt(y) * num / den;
+#endif
+ } else {
+ /*
+ * NOT USED, but reference for large Beta.
+ *
+ * Abramowitz and Stegun asymptotic formula.
+ * works for x > 3.75.
+ */
+ double y = 1./x;
+ return exp(x) * sqrt(y) *
+ // note: reciprocal squareroot may be easier!
+ // http://en.wikipedia.org/wiki/Fast_inverse_square_root
+ Poly9(I0ATerm<0>::value, I0ATerm<1>::value,
+ I0ATerm<2>::value, I0ATerm<3>::value,
+ I0ATerm<4>::value, I0ATerm<5>::value,
+ I0ATerm<6>::value, I0ATerm<7>::value,
+ I0ATerm<8>::value, y); // (... e) < 1.9e-7
+ }
+}
+
+/*
+ * calculates the transition bandwidth for a Kaiser filter
+ *
+ * Formula 3.2.8, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
+ * Formula 7.76, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
+ *
+ * @param halfNumCoef is half the number of coefficients per filter phase.
+ *
+ * @param stopBandAtten is the stop band attenuation desired.
+ *
+ * @return the transition bandwidth in normalized frequency (0 <= f <= 0.5)
+ */
+static inline double firKaiserTbw(int halfNumCoef, double stopBandAtten) {
+ return (stopBandAtten - 7.95)/((2.*14.36)*halfNumCoef);
+}
+
+/*
+ * calculates the fir transfer response of the overall polyphase filter at w.
+ *
+ * Calculates the DTFT transfer coefficient H(w) for 0 <= w <= PI, utilizing the
+ * fact that h[n] is symmetric (cosines only, no complex arithmetic).
+ *
+ * We use Goertzel's algorithm to accelerate the computation to essentially
+ * a single multiply and 2 adds per filter coefficient h[].
+ *
+ * Be careful be careful to consider that h[n] is the overall polyphase filter,
+ * with L phases, so rescaling H(w)/L is probably what you expect for "unity gain",
+ * as you only use one of the polyphases at a time.
+ */
+template <typename T>
+static inline double firTransfer(const T* coef, int L, int halfNumCoef, double w) {
+ double accum = static_cast<double>(coef[0])*0.5; // "center coefficient" from first bank
+ coef += halfNumCoef; // skip first filterbank (picked up by the last filterbank).
+#if SLOW_FIRTRANSFER
+ /* Original code for reference. This is equivalent to the code below, but slower. */
+ for (int i=1 ; i<=L ; ++i) {
+ for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
+ accum += cos(ix*w)*static_cast<double>(*coef++);
+ }
+ }
+#else
+ /*
+ * Our overall filter is stored striped by polyphases, not a contiguous h[n].
+ * We could fetch coefficients in a non-contiguous fashion
+ * but that will not scale to vector processing.
+ *
+ * We apply Goertzel's algorithm directly to each polyphase filter bank instead of
+ * using cosine generation/multiplication, thereby saving one multiply per inner loop.
+ *
+ * See: http://en.wikipedia.org/wiki/Goertzel_algorithm
+ * Also: Oppenheim and Schafer, _Discrete Time Signal Processing, 3e_, p. 720.
+ *
+ * We use the basic recursion to incorporate the cosine steps into real sequence x[n]:
+ * s[n] = x[n] + (2cosw)*s[n-1] + s[n-2]
+ *
+ * y[n] = s[n] - e^(iw)s[n-1]
+ * = sum_{k=-\infty}^{n} x[k]e^(-iw(n-k))
+ * = e^(-iwn) sum_{k=0}^{n} x[k]e^(iwk)
+ *
+ * The summation contains the frequency steps we want multiplied by the source
+ * (similar to a DTFT).
+ *
+ * Using symmetry, and just the real part (be careful, this must happen
+ * after any internal complex multiplications), the polyphase filterbank
+ * transfer function is:
+ *
+ * Hpp[n, w, w_0] = sum_{k=0}^{n} x[k] * cos(wk + w_0)
+ * = Re{ e^(iwn + iw_0) y[n]}
+ * = cos(wn+w_0) * s[n] - cos(w(n+1)+w_0) * s[n-1]
+ *
+ * using the fact that s[n] of real x[n] is real.
+ *
+ */
+ double dcos = 2. * cos(L*w);
+ int start = ((halfNumCoef)*L + 1);
+ SineGen cc((start - L) * w, w, true); // cosine
+ SineGen cp(start * w, w, true); // cosine
+ for (int i=1 ; i<=L ; ++i) {
+ double sc = 0;
+ double sp = 0;
+ for (int j=0 ; j<halfNumCoef ; ++j) {
+ double tmp = sc;
+ sc = static_cast<double>(*coef++) + dcos*sc - sp;
+ sp = tmp;
+ }
+ // If we are awfully clever, we can apply Goertzel's algorithm
+ // again on the sc and sp sequences returned here.
+ accum += cc.valueAdvance() * sc - cp.valueAdvance() * sp;
+ }
+#endif
+ return accum*2.;
+}
+
+/*
+ * evaluates the minimum and maximum |H(f)| bound in a band region.
+ *
+ * This is usually done with equally spaced increments in the target band in question.
+ * The passband is often very small, and sampled that way. The stopband is often much
+ * larger.
+ *
+ * We use the fact that the overall polyphase filter has an additional bank at the end
+ * for interpolation; hence it is overspecified for the H(f) computation. Thus the
+ * first polyphase is never actually checked, excepting its first term.
+ *
+ * In this code we use the firTransfer() evaluator above, which uses Goertzel's
+ * algorithm to calculate the transfer function at each point.
+ *
+ * TODO: An alternative with equal spacing is the FFT/DFT. An alternative with unequal
+ * spacing is a chirp transform.
+ *
+ * @param coef is the designed polyphase filter banks
+ *
+ * @param L is the number of phases (for interpolation)
+ *
+ * @param halfNumCoef should be half the number of coefficients for a single
+ * polyphase.
+ *
+ * @param fstart is the normalized frequency start.
+ *
+ * @param fend is the normalized frequency end.
+ *
+ * @param steps is the number of steps to take (sampling) between frequency start and end
+ *
+ * @param firMin returns the minimum transfer |H(f)| found
+ *
+ * @param firMax returns the maximum transfer |H(f)| found
+ *
+ * 0 <= f <= 0.5.
+ * This is used to test passband and stopband performance.
+ */
+template <typename T>
+static void testFir(const T* coef, int L, int halfNumCoef,
+ double fstart, double fend, int steps, double &firMin, double &firMax) {
+ double wstart = fstart*(2.*M_PI);
+ double wend = fend*(2.*M_PI);
+ double wstep = (wend - wstart)/steps;
+ double fmax, fmin;
+ double trf = firTransfer(coef, L, halfNumCoef, wstart);
+ if (trf<0) {
+ trf = -trf;
+ }
+ fmin = fmax = trf;
+ wstart += wstep;
+ for (int i=1; i<steps; ++i) {
+ trf = firTransfer(coef, L, halfNumCoef, wstart);
+ if (trf<0) {
+ trf = -trf;
+ }
+ if (trf>fmax) {
+ fmax = trf;
+ }
+ else if (trf<fmin) {
+ fmin = trf;
+ }
+ wstart += wstep;
+ }
+ // renormalize - this is only needed for integer filter types
+ double norm = 1./((1ULL<<(sizeof(T)*8-1))*L);
+
+ firMin = fmin * norm;
+ firMax = fmax * norm;
+}
+
+/*
+ * evaluates the |H(f)| lowpass band characteristics.
+ *
+ * This function tests the lowpass characteristics for the overall polyphase filter,
+ * and is used to verify the design. For this case, fp should be set to the
+ * passband normalized frequency from 0 to 0.5 for the overall filter (thus it
+ * is the designed polyphase bank value / L). Likewise for fs.
+ *
+ * @param coef is the designed polyphase filter banks
+ *
+ * @param L is the number of phases (for interpolation)
+ *
+ * @param halfNumCoef should be half the number of coefficients for a single
+ * polyphase.
+ *
+ * @param fp is the passband normalized frequency, 0 < fp < fs < 0.5.
+ *
+ * @param fs is the stopband normalized frequency, 0 < fp < fs < 0.5.
+ *
+ * @param passSteps is the number of passband sampling steps.
+ *
+ * @param stopSteps is the number of stopband sampling steps.
+ *
+ * @param passMin is the minimum value in the passband
+ *
+ * @param passMax is the maximum value in the passband (useful for scaling). This should
+ * be less than 1., to avoid sine wave test overflow.
+ *
+ * @param passRipple is the passband ripple. Typically this should be less than 0.1 for
+ * an audio filter. Generally speaker/headphone device characteristics will dominate
+ * the passband term.
+ *
+ * @param stopMax is the maximum value in the stopband.
+ *
+ * @param stopRipple is the stopband ripple, also known as stopband attenuation.
+ * Typically this should be greater than ~80dB for low quality, and greater than
+ * ~100dB for full 16b quality, otherwise aliasing may become noticeable.
+ *
+ */
+template <typename T>
+static void testFir(const T* coef, int L, int halfNumCoef,
+ double fp, double fs, int passSteps, int stopSteps,
+ double &passMin, double &passMax, double &passRipple,
+ double &stopMax, double &stopRipple) {
+ double fmin, fmax;
+ testFir(coef, L, halfNumCoef, 0., fp, passSteps, fmin, fmax);
+ double d1 = (fmax - fmin)/2.;
+ passMin = fmin;
+ passMax = fmax;
+ passRipple = -20.*log10(1. - d1); // passband ripple
+ testFir(coef, L, halfNumCoef, fs, 0.5, stopSteps, fmin, fmax);
+ // fmin is really not important for the stopband.
+ stopMax = fmax;
+ stopRipple = -20.*log10(fmax); // stopband ripple/attenuation
+}
+
+/*
+ * Calculates the overall polyphase filter based on a windowed sinc function.
+ *
+ * The windowed sinc is an odd length symmetric filter of exactly L*halfNumCoef*2+1
+ * taps for the entire kernel. This is then decomposed into L+1 polyphase filterbanks.
+ * The last filterbank is used for interpolation purposes (and is mostly composed
+ * of the first bank shifted by one sample), and is unnecessary if one does
+ * not do interpolation.
+ *
+ * We use the last filterbank for some transfer function calculation purposes,
+ * so it needs to be generated anyways.
+ *
+ * @param coef is the caller allocated space for coefficients. This should be
+ * exactly (L+1)*halfNumCoef in size.
+ *
+ * @param L is the number of phases (for interpolation)
+ *
+ * @param halfNumCoef should be half the number of coefficients for a single
+ * polyphase.
+ *
+ * @param stopBandAtten is the stopband value, should be >50dB.
+ *
+ * @param fcr is cutoff frequency/sampling rate (<0.5). At this point, the energy
+ * should be 6dB less. (fcr is where the amplitude drops by half). Use the
+ * firKaiserTbw() to calculate the transition bandwidth. fcr is the midpoint
+ * between the stop band and the pass band (fstop+fpass)/2.
+ *
+ * @param atten is the attenuation (generally slightly less than 1).
+ */
+
+template <typename T>
+static inline void firKaiserGen(T* coef, int L, int halfNumCoef,
+ double stopBandAtten, double fcr, double atten) {
+ //
+ // Formula 3.2.5, 3.2.7, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
+ // Formula 7.75, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
+ //
+ // See also: http://melodi.ee.washington.edu/courses/ee518/notes/lec17.pdf
+ //
+ // Kaiser window and beta parameter
+ //
+ // | 0.1102*(A - 8.7) A > 50
+ // beta = | 0.5842*(A - 21)^0.4 + 0.07886*(A - 21) 21 <= A <= 50
+ // | 0. A < 21
+ //
+ // with A is the desired stop-band attenuation in dBFS
+ //
+ // 30 dB 2.210
+ // 40 dB 3.384
+ // 50 dB 4.538
+ // 60 dB 5.658
+ // 70 dB 6.764
+ // 80 dB 7.865
+ // 90 dB 8.960
+ // 100 dB 10.056
+
+ const int N = L * halfNumCoef; // non-negative half
+ const double beta = 0.1102 * (stopBandAtten - 8.7); // >= 50dB always
+ const double xstep = (2. * M_PI) * fcr / L;
+ const double xfrac = 1. / N;
+ const double yscale = atten * L / (I0(beta) * M_PI);
+
+ // We use sine generators, which computes sines on regular step intervals.
+ // This speeds up overall computation about 40% from computing the sine directly.
+
+ SineGenGen sgg(0., xstep, L*xstep); // generates sine generators (one per polyphase)
+
+ for (int i=0 ; i<=L ; ++i) { // generate an extra set of coefs for interpolation
+
+ // computation for a single polyphase of the overall filter.
+ SineGen sg = sgg.valueAdvance(); // current sine generator for "j" inner loop.
+ double err = 0; // for noise shaping on int16_t coefficients (over each polyphase)
+
+ for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
+ double y;
+ if (CC_LIKELY(ix)) {
+ double x = static_cast<double>(ix);
+
+ // sine generator: sg.valueAdvance() returns sin(ix*xstep);
+ y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x;
+ } else {
+ y = 2. * atten * fcr; // center of filter, sinc(0) = 1.
+ sg.advance();
+ }
+
+ // (caution!) float version does not need rounding
+ if (is_same<T, int16_t>::value) { // int16_t needs noise shaping
+ *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1), err));
+ } else {
+ *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1)));
+ }
+ }
+ }
+}
+
+}; // namespace android
+
+#endif /*ANDROID_AUDIO_RESAMPLER_FIR_GEN_H*/
diff --git a/services/audioflinger/AudioResamplerFirOps.h b/services/audioflinger/AudioResamplerFirOps.h
new file mode 100644
index 0000000..bf2163f
--- /dev/null
+++ b/services/audioflinger/AudioResamplerFirOps.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_AUDIO_RESAMPLER_FIR_OPS_H
+#define ANDROID_AUDIO_RESAMPLER_FIR_OPS_H
+
+namespace android {
+
+#if defined(__arm__) && !defined(__thumb__)
+#define USE_INLINE_ASSEMBLY (true)
+#else
+#define USE_INLINE_ASSEMBLY (false)
+#endif
+
+#if USE_INLINE_ASSEMBLY && defined(__ARM_NEON__)
+#define USE_NEON (true)
+#include <arm_neon.h>
+#else
+#define USE_NEON (false)
+#endif
+
+template<typename T, typename U>
+struct is_same
+{
+ static const bool value = false;
+};
+
+template<typename T>
+struct is_same<T, T> // partial specialization
+{
+ static const bool value = true;
+};
+
+static inline
+int32_t mulRL(int left, int32_t in, uint32_t vRL)
+{
+#if USE_INLINE_ASSEMBLY
+ int32_t out;
+ if (left) {
+ asm( "smultb %[out], %[in], %[vRL] \n"
+ : [out]"=r"(out)
+ : [in]"%r"(in), [vRL]"r"(vRL)
+ : );
+ } else {
+ asm( "smultt %[out], %[in], %[vRL] \n"
+ : [out]"=r"(out)
+ : [in]"%r"(in), [vRL]"r"(vRL)
+ : );
+ }
+ return out;
+#else
+ int16_t v = left ? static_cast<int16_t>(vRL) : static_cast<int16_t>(vRL>>16);
+ return static_cast<int32_t>((static_cast<int64_t>(in) * v) >> 16);
+#endif
+}
+
+static inline
+int32_t mulAdd(int16_t in, int16_t v, int32_t a)
+{
+#if USE_INLINE_ASSEMBLY
+ int32_t out;
+ asm( "smlabb %[out], %[v], %[in], %[a] \n"
+ : [out]"=r"(out)
+ : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
+ : );
+ return out;
+#else
+ return a + v * in;
+#endif
+}
+
+static inline
+int32_t mulAdd(int16_t in, int32_t v, int32_t a)
+{
+#if USE_INLINE_ASSEMBLY
+ int32_t out;
+ asm( "smlawb %[out], %[v], %[in], %[a] \n"
+ : [out]"=r"(out)
+ : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
+ : );
+ return out;
+#else
+ return a + static_cast<int32_t>((static_cast<int64_t>(v) * in) >> 16);
+#endif
+}
+
+static inline
+int32_t mulAdd(int32_t in, int32_t v, int32_t a)
+{
+#if USE_INLINE_ASSEMBLY
+ int32_t out;
+ asm( "smmla %[out], %[v], %[in], %[a] \n"
+ : [out]"=r"(out)
+ : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
+ : );
+ return out;
+#else
+ return a + static_cast<int32_t>((static_cast<int64_t>(v) * in) >> 32);
+#endif
+}
+
+static inline
+int32_t mulAddRL(int left, uint32_t inRL, int16_t v, int32_t a)
+{
+#if USE_INLINE_ASSEMBLY
+ int32_t out;
+ if (left) {
+ asm( "smlabb %[out], %[v], %[inRL], %[a] \n"
+ : [out]"=r"(out)
+ : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
+ : );
+ } else {
+ asm( "smlabt %[out], %[v], %[inRL], %[a] \n"
+ : [out]"=r"(out)
+ : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
+ : );
+ }
+ return out;
+#else
+ int16_t s = left ? static_cast<int16_t>(inRL) : static_cast<int16_t>(inRL>>16);
+ return a + v * s;
+#endif
+}
+
+static inline
+int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a)
+{
+#if USE_INLINE_ASSEMBLY
+ int32_t out;
+ if (left) {
+ asm( "smlawb %[out], %[v], %[inRL], %[a] \n"
+ : [out]"=r"(out)
+ : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
+ : );
+ } else {
+ asm( "smlawt %[out], %[v], %[inRL], %[a] \n"
+ : [out]"=r"(out)
+ : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
+ : );
+ }
+ return out;
+#else
+ int16_t s = left ? static_cast<int16_t>(inRL) : static_cast<int16_t>(inRL>>16);
+ return a + static_cast<int32_t>((static_cast<int64_t>(v) * s) >> 16);
+#endif
+}
+
+}; // namespace android
+
+#endif /*ANDROID_AUDIO_RESAMPLER_FIR_OPS_H*/
diff --git a/services/audioflinger/AudioResamplerFirProcess.h b/services/audioflinger/AudioResamplerFirProcess.h
new file mode 100644
index 0000000..38e387c
--- /dev/null
+++ b/services/audioflinger/AudioResamplerFirProcess.h
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H
+#define ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H
+
+namespace android {
+
+// depends on AudioResamplerFirOps.h
+
+template<int CHANNELS, typename TC>
+static inline
+void mac(
+ int32_t& l, int32_t& r,
+ const TC coef,
+ const int16_t* samples)
+{
+ if (CHANNELS == 2) {
+ uint32_t rl = *reinterpret_cast<const uint32_t*>(samples);
+ l = mulAddRL(1, rl, coef, l);
+ r = mulAddRL(0, rl, coef, r);
+ } else {
+ r = l = mulAdd(samples[0], coef, l);
+ }
+}
+
+template<int CHANNELS, typename TC>
+static inline
+void interpolate(
+ int32_t& l, int32_t& r,
+ const TC coef_0, const TC coef_1,
+ const int16_t lerp, const int16_t* samples)
+{
+ TC sinc;
+
+ if (is_same<TC, int16_t>::value) {
+ sinc = (lerp * ((coef_1-coef_0)<<1)>>16) + coef_0;
+ } else {
+ sinc = mulAdd(lerp, (coef_1-coef_0)<<1, coef_0);
+ }
+ if (CHANNELS == 2) {
+ uint32_t rl = *reinterpret_cast<const uint32_t*>(samples);
+ l = mulAddRL(1, rl, sinc, l);
+ r = mulAddRL(0, rl, sinc, r);
+ } else {
+ r = l = mulAdd(samples[0], sinc, l);
+ }
+}
+
+/*
+ * Calculates a single output sample (two stereo frames).
+ *
+ * This function computes both the positive half FIR dot product and
+ * the negative half FIR dot product, accumulates, and then applies the volume.
+ *
+ * This is a locked phase filter (it does not compute the interpolation).
+ *
+ * Use fir() to compute the proper coefficient pointers for a polyphase
+ * filter bank.
+ */
+
+template <int CHANNELS, int STRIDE, typename TC>
+static inline
+void ProcessL(int32_t* const out,
+ int count,
+ const TC* coefsP,
+ const TC* coefsN,
+ const int16_t* sP,
+ const int16_t* sN,
+ const int32_t* const volumeLR)
+{
+ int32_t l = 0;
+ int32_t r = 0;
+ do {
+ mac<CHANNELS>(l, r, *coefsP++, sP);
+ sP -= CHANNELS;
+ mac<CHANNELS>(l, r, *coefsN++, sN);
+ sN += CHANNELS;
+ } while (--count > 0);
+ out[0] += 2 * mulRL(0, l, volumeLR[0]); // Note: only use top 16b
+ out[1] += 2 * mulRL(0, r, volumeLR[1]); // Note: only use top 16b
+}
+
+/*
+ * Calculates a single output sample (two stereo frames) interpolating phase.
+ *
+ * This function computes both the positive half FIR dot product and
+ * the negative half FIR dot product, accumulates, and then applies the volume.
+ *
+ * This is an interpolated phase filter.
+ *
+ * Use fir() to compute the proper coefficient pointers for a polyphase
+ * filter bank.
+ */
+
+template <int CHANNELS, int STRIDE, typename TC>
+static inline
+void Process(int32_t* const out,
+ int count,
+ const TC* coefsP,
+ const TC* coefsN,
+ const TC* coefsP1,
+ const TC* coefsN1,
+ const int16_t* sP,
+ const int16_t* sN,
+ uint32_t lerpP,
+ const int32_t* const volumeLR)
+{
+ (void) coefsP1; // suppress unused parameter warning
+ (void) coefsN1;
+ if (sizeof(*coefsP)==4) {
+ lerpP >>= 16; // ensure lerpP is 16b
+ }
+ int32_t l = 0;
+ int32_t r = 0;
+ for (size_t i = 0; i < count; ++i) {
+ interpolate<CHANNELS>(l, r, coefsP[0], coefsP[count], lerpP, sP);
+ coefsP++;
+ sP -= CHANNELS;
+ interpolate<CHANNELS>(l, r, coefsN[count], coefsN[0], lerpP, sN);
+ coefsN++;
+ sN += CHANNELS;
+ }
+ out[0] += 2 * mulRL(0, l, volumeLR[0]); // Note: only use top 16b
+ out[1] += 2 * mulRL(0, r, volumeLR[1]); // Note: only use top 16b
+}
+
+/*
+ * Calculates a single output sample (two stereo frames) from input sample pointer.
+ *
+ * This sets up the params for the accelerated Process() and ProcessL()
+ * functions to do the appropriate dot products.
+ *
+ * @param out should point to the output buffer with at least enough space for 2 output frames.
+ *
+ * @param phase is the fractional distance between input samples for interpolation:
+ * phase >= 0 && phase < phaseWrapLimit. It can be thought of as a rational fraction
+ * of phase/phaseWrapLimit.
+ *
+ * @param phaseWrapLimit is #polyphases<<coefShift, where #polyphases is the number of polyphases
+ * in the polyphase filter. Likewise, #polyphases can be obtained as (phaseWrapLimit>>coefShift).
+ *
+ * @param coefShift gives the bit alignment of the polyphase index in the phase parameter.
+ *
+ * @param halfNumCoefs is the half the number of coefficients per polyphase filter. Since the
+ * overall filterbank is odd-length symmetric, only halfNumCoefs need be stored.
+ *
+ * @param coefs is the polyphase filter bank, starting at from polyphase index 0, and ranging to
+ * and including the #polyphases. Each polyphase of the filter has half-length halfNumCoefs
+ * (due to symmetry). The total size of the filter bank in coefficients is
+ * (#polyphases+1)*halfNumCoefs.
+ *
+ * The filter bank coefs should be aligned to a minimum of 16 bytes (preferrably to cache line).
+ *
+ * The coefs should be attenuated (to compensate for passband ripple)
+ * if storing back into the native format.
+ *
+ * @param samples are unaligned input samples. The position is in the "middle" of the
+ * sample array with respect to the FIR filter:
+ * the negative half of the filter is dot product from samples+1 to samples+halfNumCoefs;
+ * the positive half of the filter is dot product from samples to samples-halfNumCoefs+1.
+ *
+ * @param volumeLR is a pointer to an array of two 32 bit volume values, one per stereo channel,
+ * expressed as a S32 integer. A negative value inverts the channel 180 degrees.
+ * The pointer volumeLR should be aligned to a minimum of 8 bytes.
+ * A typical value for volume is 0x1000 to align to a unity gain output of 20.12.
+ *
+ * In between calls to filterCoefficient, the phase is incremented by phaseIncrement, where
+ * phaseIncrement is calculated as inputSampling * phaseWrapLimit / outputSampling.
+ *
+ * The filter polyphase index is given by indexP = phase >> coefShift. Due to
+ * odd length symmetric filter, the polyphase index of the negative half depends on
+ * whether interpolation is used.
+ *
+ * The fractional siting between the polyphase indices is given by the bits below coefShift:
+ *
+ * lerpP = phase << 32 - coefShift >> 1; // for 32 bit unsigned phase multiply
+ * lerpP = phase << 32 - coefShift >> 17; // for 16 bit unsigned phase multiply
+ *
+ * For integer types, this is expressed as:
+ *
+ * lerpP = phase << sizeof(phase)*8 - coefShift
+ * >> (sizeof(phase)-sizeof(*coefs))*8 + 1;
+ *
+ */
+
+template<int CHANNELS, bool LOCKED, int STRIDE, typename TC>
+static inline
+void fir(int32_t* const out,
+ const uint32_t phase, const uint32_t phaseWrapLimit,
+ const int coefShift, const int halfNumCoefs, const TC* const coefs,
+ const int16_t* const samples, const int32_t* const volumeLR)
+{
+ // NOTE: be very careful when modifying the code here. register
+ // pressure is very high and a small change might cause the compiler
+ // to generate far less efficient code.
+ // Always sanity check the result with objdump or test-resample.
+
+ if (LOCKED) {
+ // locked polyphase (no interpolation)
+ // Compute the polyphase filter index on the positive and negative side.
+ uint32_t indexP = phase >> coefShift;
+ uint32_t indexN = (phaseWrapLimit - phase) >> coefShift;
+ const TC* coefsP = coefs + indexP*halfNumCoefs;
+ const TC* coefsN = coefs + indexN*halfNumCoefs;
+ const int16_t* sP = samples;
+ const int16_t* sN = samples + CHANNELS;
+
+ // dot product filter.
+ ProcessL<CHANNELS, STRIDE>(out,
+ halfNumCoefs, coefsP, coefsN, sP, sN, volumeLR);
+ } else {
+ // interpolated polyphase
+ // Compute the polyphase filter index on the positive and negative side.
+ uint32_t indexP = phase >> coefShift;
+ uint32_t indexN = (phaseWrapLimit - phase - 1) >> coefShift; // one's complement.
+ const TC* coefsP = coefs + indexP*halfNumCoefs;
+ const TC* coefsN = coefs + indexN*halfNumCoefs;
+ const TC* coefsP1 = coefsP + halfNumCoefs;
+ const TC* coefsN1 = coefsN + halfNumCoefs;
+ const int16_t* sP = samples;
+ const int16_t* sN = samples + CHANNELS;
+
+ // Interpolation fraction lerpP derived by shifting all the way up and down
+ // to clear the appropriate bits and align to the appropriate level
+ // for the integer multiply. The constants should resolve in compile time.
+ //
+ // The interpolated filter coefficient is derived as follows for the pos/neg half:
+ //
+ // interpolated[P] = index[P]*lerpP + index[P+1]*(1-lerpP)
+ // interpolated[N] = index[N+1]*lerpP + index[N]*(1-lerpP)
+ uint32_t lerpP = phase << (sizeof(phase)*8 - coefShift)
+ >> ((sizeof(phase)-sizeof(*coefs))*8 + 1);
+
+ // on-the-fly interpolated dot product filter
+ Process<CHANNELS, STRIDE>(out,
+ halfNumCoefs, coefsP, coefsN, coefsP1, coefsN1, sP, sN, lerpP, volumeLR);
+ }
+}
+
+}; // namespace android
+
+#endif /*ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H*/
diff --git a/services/audioflinger/AudioResamplerFirProcessNeon.h b/services/audioflinger/AudioResamplerFirProcessNeon.h
new file mode 100644
index 0000000..f311cef
--- /dev/null
+++ b/services/audioflinger/AudioResamplerFirProcessNeon.h
@@ -0,0 +1,1149 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_NEON_H
+#define ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_NEON_H
+
+namespace android {
+
+// depends on AudioResamplerFirOps.h, AudioResamplerFirProcess.h
+
+#if USE_NEON
+//
+// NEON specializations are enabled for Process() and ProcessL()
+//
+// TODO: Stride 16 and Stride 8 can be combined with one pass stride 8 (if necessary)
+// and looping stride 16 (or vice versa). This has some polyphase coef data alignment
+// issues with S16 coefs. Consider this later.
+
+// Macros to save a mono/stereo accumulator sample in q0 (and q4) as stereo out.
+#define ASSEMBLY_ACCUMULATE_MONO \
+ "vld1.s32 {d2}, [%[vLR]:64] \n"/* (1) load volumes */\
+ "vld1.s32 {d3}, %[out] \n"/* (2) unaligned load the output */\
+ "vpadd.s32 d0, d0, d1 \n"/* (1) add all 4 partial sums */\
+ "vpadd.s32 d0, d0, d0 \n"/* (1+4d) and replicate L/R */\
+ "vqrdmulh.s32 d0, d0, d2 \n"/* (2+3d) apply volume */\
+ "vqadd.s32 d3, d3, d0 \n"/* (1+4d) accumulate result (saturating) */\
+ "vst1.s32 {d3}, %[out] \n"/* (2+2d) store result */
+
+#define ASSEMBLY_ACCUMULATE_STEREO \
+ "vld1.s32 {d2}, [%[vLR]:64] \n"/* (1) load volumes*/\
+ "vld1.s32 {d3}, %[out] \n"/* (2) unaligned load the output*/\
+ "vpadd.s32 d0, d0, d1 \n"/* (1) add all 4 partial sums from q0*/\
+ "vpadd.s32 d8, d8, d9 \n"/* (1) add all 4 partial sums from q4*/\
+ "vpadd.s32 d0, d0, d8 \n"/* (1+4d) combine into L/R*/\
+ "vqrdmulh.s32 d0, d0, d2 \n"/* (2+3d) apply volume*/\
+ "vqadd.s32 d3, d3, d0 \n"/* (1+4d) accumulate result (saturating)*/\
+ "vst1.s32 {d3}, %[out] \n"/* (2+2d)store result*/
+
+template <>
+inline void ProcessL<1, 16>(int32_t* const out,
+ int count,
+ const int16_t* coefsP,
+ const int16_t* coefsN,
+ const int16_t* sP,
+ const int16_t* sN,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 1; // template specialization does not preserve params
+ const int STRIDE = 16;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "veor q0, q0, q0 \n"// (0 - combines+) accumulator = 0
+
+ "1: \n"
+
+ "vld1.16 {q2}, [%[sP]] \n"// (2+0d) load 8 16-bits mono samples
+ "vld1.16 {q3}, [%[sN]]! \n"// (2) load 8 16-bits mono samples
+ "vld1.16 {q8}, [%[coefsP0]:128]! \n"// (1) load 8 16-bits coefs
+ "vld1.16 {q10}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs
+
+ "vrev64.16 q2, q2 \n"// (1) reverse s3, s2, s1, s0, s7, s6, s5, s4
+
+ // reordering the vmal to do d6, d7 before d4, d5 is slower(?)
+ "vmlal.s16 q0, d4, d17 \n"// (1+0d) multiply (reversed)samples by coef
+ "vmlal.s16 q0, d5, d16 \n"// (1) multiply (reversed)samples by coef
+ "vmlal.s16 q0, d6, d20 \n"// (1) multiply neg samples
+ "vmlal.s16 q0, d7, d21 \n"// (1) multiply neg samples
+
+ // moving these ARM instructions before neon above seems to be slower
+ "subs %[count], %[count], #8 \n"// (1) update loop counter
+ "sub %[sP], %[sP], #16 \n"// (0) move pointer to next set of samples
+
+ // sP used after branch (warning)
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_MONO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q8", "q10"
+ );
+}
+
+template <>
+inline void ProcessL<2, 16>(int32_t* const out,
+ int count,
+ const int16_t* coefsP,
+ const int16_t* coefsN,
+ const int16_t* sP,
+ const int16_t* sN,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 2; // template specialization does not preserve params
+ const int STRIDE = 16;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "veor q0, q0, q0 \n"// (1) acc_L = 0
+ "veor q4, q4, q4 \n"// (0 combines+) acc_R = 0
+
+ "1: \n"
+
+ "vld2.16 {q2, q3}, [%[sP]] \n"// (3+0d) load 8 16-bits stereo samples
+ "vld2.16 {q5, q6}, [%[sN]]! \n"// (3) load 8 16-bits stereo samples
+ "vld1.16 {q8}, [%[coefsP0]:128]! \n"// (1) load 8 16-bits coefs
+ "vld1.16 {q10}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs
+
+ "vrev64.16 q2, q2 \n"// (1) reverse 8 frames of the left positive
+ "vrev64.16 q3, q3 \n"// (0 combines+) reverse right positive
+
+ "vmlal.s16 q0, d4, d17 \n"// (1) multiply (reversed) samples left
+ "vmlal.s16 q0, d5, d16 \n"// (1) multiply (reversed) samples left
+ "vmlal.s16 q4, d6, d17 \n"// (1) multiply (reversed) samples right
+ "vmlal.s16 q4, d7, d16 \n"// (1) multiply (reversed) samples right
+ "vmlal.s16 q0, d10, d20 \n"// (1) multiply samples left
+ "vmlal.s16 q0, d11, d21 \n"// (1) multiply samples left
+ "vmlal.s16 q4, d12, d20 \n"// (1) multiply samples right
+ "vmlal.s16 q4, d13, d21 \n"// (1) multiply samples right
+
+ // moving these ARM before neon seems to be slower
+ "subs %[count], %[count], #8 \n"// (1) update loop counter
+ "sub %[sP], %[sP], #32 \n"// (0) move pointer to next set of samples
+
+ // sP used after branch (warning)
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_STEREO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q4", "q5", "q6",
+ "q8", "q10"
+ );
+}
+
+template <>
+inline void Process<1, 16>(int32_t* const out,
+ int count,
+ const int16_t* coefsP,
+ const int16_t* coefsN,
+ const int16_t* coefsP1,
+ const int16_t* coefsN1,
+ const int16_t* sP,
+ const int16_t* sN,
+ uint32_t lerpP,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 1; // template specialization does not preserve params
+ const int STRIDE = 16;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "vmov.32 d2[0], %[lerpP] \n"// load the positive phase S32 Q15
+ "veor q0, q0, q0 \n"// (0 - combines+) accumulator = 0
+
+ "1: \n"
+
+ "vld1.16 {q2}, [%[sP]] \n"// (2+0d) load 8 16-bits mono samples
+ "vld1.16 {q3}, [%[sN]]! \n"// (2) load 8 16-bits mono samples
+ "vld1.16 {q8}, [%[coefsP0]:128]! \n"// (1) load 8 16-bits coefs
+ "vld1.16 {q9}, [%[coefsP1]:128]! \n"// (1) load 8 16-bits coefs for interpolation
+ "vld1.16 {q10}, [%[coefsN1]:128]! \n"// (1) load 8 16-bits coefs
+ "vld1.16 {q11}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs for interpolation
+
+ "vsub.s16 q9, q9, q8 \n"// (1) interpolate (step1) 1st set of coefs
+ "vsub.s16 q11, q11, q10 \n"// (1) interpolate (step1) 2nd set of coets
+
+ "vqrdmulh.s16 q9, q9, d2[0] \n"// (2) interpolate (step2) 1st set of coefs
+ "vqrdmulh.s16 q11, q11, d2[0] \n"// (2) interpolate (step2) 2nd set of coefs
+
+ "vrev64.16 q2, q2 \n"// (1) reverse s3, s2, s1, s0, s7, s6, s5, s4
+
+ "vadd.s16 q8, q8, q9 \n"// (1+2d) interpolate (step3) 1st set
+ "vadd.s16 q10, q10, q11 \n"// (1+1d) interpolate (step3) 2nd set
+
+ // reordering the vmal to do d6, d7 before d4, d5 is slower(?)
+ "vmlal.s16 q0, d4, d17 \n"// (1+0d) multiply reversed samples by coef
+ "vmlal.s16 q0, d5, d16 \n"// (1) multiply reversed samples by coef
+ "vmlal.s16 q0, d6, d20 \n"// (1) multiply neg samples
+ "vmlal.s16 q0, d7, d21 \n"// (1) multiply neg samples
+
+ // moving these ARM instructions before neon above seems to be slower
+ "subs %[count], %[count], #8 \n"// (1) update loop counter
+ "sub %[sP], %[sP], #16 \n"// (0) move pointer to next set of samples
+
+ // sP used after branch (warning)
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_MONO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [coefsP1] "+r" (coefsP1),
+ [coefsN1] "+r" (coefsN1),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [lerpP] "r" (lerpP),
+ [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q8", "q9", "q10", "q11"
+ );
+}
+
+template <>
+inline void Process<2, 16>(int32_t* const out,
+ int count,
+ const int16_t* coefsP,
+ const int16_t* coefsN,
+ const int16_t* coefsP1,
+ const int16_t* coefsN1,
+ const int16_t* sP,
+ const int16_t* sN,
+ uint32_t lerpP,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 2; // template specialization does not preserve params
+ const int STRIDE = 16;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "vmov.32 d2[0], %[lerpP] \n"// load the positive phase
+ "veor q0, q0, q0 \n"// (1) acc_L = 0
+ "veor q4, q4, q4 \n"// (0 combines+) acc_R = 0
+
+ "1: \n"
+
+ "vld2.16 {q2, q3}, [%[sP]] \n"// (3+0d) load 8 16-bits stereo samples
+ "vld2.16 {q5, q6}, [%[sN]]! \n"// (3) load 8 16-bits stereo samples
+ "vld1.16 {q8}, [%[coefsP0]:128]! \n"// (1) load 8 16-bits coefs
+ "vld1.16 {q9}, [%[coefsP1]:128]! \n"// (1) load 8 16-bits coefs for interpolation
+ "vld1.16 {q10}, [%[coefsN1]:128]! \n"// (1) load 8 16-bits coefs
+ "vld1.16 {q11}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs for interpolation
+
+ "vsub.s16 q9, q9, q8 \n"// (1) interpolate (step1) 1st set of coefs
+ "vsub.s16 q11, q11, q10 \n"// (1) interpolate (step1) 2nd set of coets
+
+ "vqrdmulh.s16 q9, q9, d2[0] \n"// (2) interpolate (step2) 1st set of coefs
+ "vqrdmulh.s16 q11, q11, d2[0] \n"// (2) interpolate (step2) 2nd set of coefs
+
+ "vrev64.16 q2, q2 \n"// (1) reverse 8 frames of the left positive
+ "vrev64.16 q3, q3 \n"// (1) reverse 8 frames of the right positive
+
+ "vadd.s16 q8, q8, q9 \n"// (1+1d) interpolate (step3) 1st set
+ "vadd.s16 q10, q10, q11 \n"// (1+1d) interpolate (step3) 2nd set
+
+ "vmlal.s16 q0, d4, d17 \n"// (1) multiply reversed samples left
+ "vmlal.s16 q0, d5, d16 \n"// (1) multiply reversed samples left
+ "vmlal.s16 q4, d6, d17 \n"// (1) multiply reversed samples right
+ "vmlal.s16 q4, d7, d16 \n"// (1) multiply reversed samples right
+ "vmlal.s16 q0, d10, d20 \n"// (1) multiply samples left
+ "vmlal.s16 q0, d11, d21 \n"// (1) multiply samples left
+ "vmlal.s16 q4, d12, d20 \n"// (1) multiply samples right
+ "vmlal.s16 q4, d13, d21 \n"// (1) multiply samples right
+
+ // moving these ARM before neon seems to be slower
+ "subs %[count], %[count], #8 \n"// (1) update loop counter
+ "sub %[sP], %[sP], #32 \n"// (0) move pointer to next set of samples
+
+ // sP used after branch (warning)
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_STEREO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [coefsP1] "+r" (coefsP1),
+ [coefsN1] "+r" (coefsN1),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [lerpP] "r" (lerpP),
+ [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q4", "q5", "q6",
+ "q8", "q9", "q10", "q11"
+ );
+}
+
+template <>
+inline void ProcessL<1, 16>(int32_t* const out,
+ int count,
+ const int32_t* coefsP,
+ const int32_t* coefsN,
+ const int16_t* sP,
+ const int16_t* sN,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 1; // template specialization does not preserve params
+ const int STRIDE = 16;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "veor q0, q0, q0 \n"// result, initialize to 0
+
+ "1: \n"
+
+ "vld1.16 {q2}, [%[sP]] \n"// load 8 16-bits mono samples
+ "vld1.16 {q3}, [%[sN]]! \n"// load 8 16-bits mono samples
+ "vld1.32 {q8, q9}, [%[coefsP0]:128]! \n"// load 8 32-bits coefs
+ "vld1.32 {q10, q11}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs
+
+ "vrev64.16 q2, q2 \n"// reverse 8 frames of the positive side
+
+ "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits
+
+ "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q15, d7, #15 \n"// extend samples to 31 bits
+
+ "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef
+
+ "vadd.s32 q0, q0, q12 \n"// accumulate result
+ "vadd.s32 q13, q13, q14 \n"// accumulate result
+ "vadd.s32 q0, q0, q15 \n"// accumulate result
+ "vadd.s32 q0, q0, q13 \n"// accumulate result
+
+ "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples
+ "subs %[count], %[count], #8 \n"// update loop counter
+
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_MONO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15"
+ );
+}
+
+template <>
+inline void ProcessL<2, 16>(int32_t* const out,
+ int count,
+ const int32_t* coefsP,
+ const int32_t* coefsN,
+ const int16_t* sP,
+ const int16_t* sN,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 2; // template specialization does not preserve params
+ const int STRIDE = 16;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "veor q0, q0, q0 \n"// result, initialize to 0
+ "veor q4, q4, q4 \n"// result, initialize to 0
+
+ "1: \n"
+
+ "vld2.16 {q2, q3}, [%[sP]] \n"// load 4 16-bits stereo samples
+ "vld2.16 {q5, q6}, [%[sN]]! \n"// load 4 16-bits stereo samples
+ "vld1.32 {q8, q9}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs
+ "vld1.32 {q10, q11}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs
+
+ "vrev64.16 q2, q2 \n"// reverse 8 frames of the positive side
+ "vrev64.16 q3, q3 \n"// reverse 8 frames of the positive side
+
+ "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits
+
+ "vshll.s16 q14, d10, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q15, d11, #15 \n"// extend samples to 31 bits
+
+ "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef
+
+ "vadd.s32 q0, q0, q12 \n"// accumulate result
+ "vadd.s32 q13, q13, q14 \n"// accumulate result
+ "vadd.s32 q0, q0, q15 \n"// (+1) accumulate result
+ "vadd.s32 q0, q0, q13 \n"// (+1) accumulate result
+
+ "vshll.s16 q12, d6, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q13, d7, #15 \n"// extend samples to 31 bits
+
+ "vshll.s16 q14, d12, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q15, d13, #15 \n"// extend samples to 31 bits
+
+ "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef
+
+ "vadd.s32 q4, q4, q12 \n"// accumulate result
+ "vadd.s32 q13, q13, q14 \n"// accumulate result
+ "vadd.s32 q4, q4, q15 \n"// (+1) accumulate result
+ "vadd.s32 q4, q4, q13 \n"// (+1) accumulate result
+
+ "subs %[count], %[count], #8 \n"// update loop counter
+ "sub %[sP], %[sP], #32 \n"// move pointer to next set of samples
+
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_STEREO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q4", "q5", "q6",
+ "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15"
+ );
+}
+
+template <>
+inline void Process<1, 16>(int32_t* const out,
+ int count,
+ const int32_t* coefsP,
+ const int32_t* coefsN,
+ const int32_t* coefsP1,
+ const int32_t* coefsN1,
+ const int16_t* sP,
+ const int16_t* sN,
+ uint32_t lerpP,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 1; // template specialization does not preserve params
+ const int STRIDE = 16;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "vmov.32 d2[0], %[lerpP] \n"// load the positive phase
+ "veor q0, q0, q0 \n"// result, initialize to 0
+
+ "1: \n"
+
+ "vld1.16 {q2}, [%[sP]] \n"// load 8 16-bits mono samples
+ "vld1.16 {q3}, [%[sN]]! \n"// load 8 16-bits mono samples
+ "vld1.32 {q8, q9}, [%[coefsP0]:128]! \n"// load 8 32-bits coefs
+ "vld1.32 {q12, q13}, [%[coefsP1]:128]! \n"// load 8 32-bits coefs
+ "vld1.32 {q10, q11}, [%[coefsN1]:128]! \n"// load 8 32-bits coefs
+ "vld1.32 {q14, q15}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs
+
+ "vsub.s32 q12, q12, q8 \n"// interpolate (step1)
+ "vsub.s32 q13, q13, q9 \n"// interpolate (step1)
+ "vsub.s32 q14, q14, q10 \n"// interpolate (step1)
+ "vsub.s32 q15, q15, q11 \n"// interpolate (step1)
+
+ "vqrdmulh.s32 q12, q12, d2[0] \n"// interpolate (step2)
+ "vqrdmulh.s32 q13, q13, d2[0] \n"// interpolate (step2)
+ "vqrdmulh.s32 q14, q14, d2[0] \n"// interpolate (step2)
+ "vqrdmulh.s32 q15, q15, d2[0] \n"// interpolate (step2)
+
+ "vadd.s32 q8, q8, q12 \n"// interpolate (step3)
+ "vadd.s32 q9, q9, q13 \n"// interpolate (step3)
+ "vadd.s32 q10, q10, q14 \n"// interpolate (step3)
+ "vadd.s32 q11, q11, q15 \n"// interpolate (step3)
+
+ "vrev64.16 q2, q2 \n"// reverse 8 frames of the positive side
+
+ "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits
+
+ "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q15, d7, #15 \n"// extend samples to 31 bits
+
+ "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef
+
+ "vadd.s32 q0, q0, q12 \n"// accumulate result
+ "vadd.s32 q13, q13, q14 \n"// accumulate result
+ "vadd.s32 q0, q0, q15 \n"// accumulate result
+ "vadd.s32 q0, q0, q13 \n"// accumulate result
+
+ "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples
+ "subs %[count], %[count], #8 \n"// update loop counter
+
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_MONO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [coefsP1] "+r" (coefsP1),
+ [coefsN1] "+r" (coefsN1),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [lerpP] "r" (lerpP),
+ [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15"
+ );
+}
+
+template <>
+inline void Process<2, 16>(int32_t* const out,
+ int count,
+ const int32_t* coefsP,
+ const int32_t* coefsN,
+ const int32_t* coefsP1,
+ const int32_t* coefsN1,
+ const int16_t* sP,
+ const int16_t* sN,
+ uint32_t lerpP,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 2; // template specialization does not preserve params
+ const int STRIDE = 16;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "vmov.32 d2[0], %[lerpP] \n"// load the positive phase
+ "veor q0, q0, q0 \n"// result, initialize to 0
+ "veor q4, q4, q4 \n"// result, initialize to 0
+
+ "1: \n"
+
+ "vld2.16 {q2, q3}, [%[sP]] \n"// load 4 16-bits stereo samples
+ "vld2.16 {q5, q6}, [%[sN]]! \n"// load 4 16-bits stereo samples
+ "vld1.32 {q8, q9}, [%[coefsP0]:128]! \n"// load 8 32-bits coefs
+ "vld1.32 {q12, q13}, [%[coefsP1]:128]! \n"// load 8 32-bits coefs
+ "vld1.32 {q10, q11}, [%[coefsN1]:128]! \n"// load 8 32-bits coefs
+ "vld1.32 {q14, q15}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs
+
+ "vsub.s32 q12, q12, q8 \n"// interpolate (step1)
+ "vsub.s32 q13, q13, q9 \n"// interpolate (step1)
+ "vsub.s32 q14, q14, q10 \n"// interpolate (step1)
+ "vsub.s32 q15, q15, q11 \n"// interpolate (step1)
+
+ "vqrdmulh.s32 q12, q12, d2[0] \n"// interpolate (step2)
+ "vqrdmulh.s32 q13, q13, d2[0] \n"// interpolate (step2)
+ "vqrdmulh.s32 q14, q14, d2[0] \n"// interpolate (step2)
+ "vqrdmulh.s32 q15, q15, d2[0] \n"// interpolate (step2)
+
+ "vadd.s32 q8, q8, q12 \n"// interpolate (step3)
+ "vadd.s32 q9, q9, q13 \n"// interpolate (step3)
+ "vadd.s32 q10, q10, q14 \n"// interpolate (step3)
+ "vadd.s32 q11, q11, q15 \n"// interpolate (step3)
+
+ "vrev64.16 q2, q2 \n"// reverse 8 frames of the positive side
+ "vrev64.16 q3, q3 \n"// reverse 8 frames of the positive side
+
+ "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits
+
+ "vshll.s16 q14, d10, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q15, d11, #15 \n"// extend samples to 31 bits
+
+ "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef
+
+ "vadd.s32 q0, q0, q12 \n"// accumulate result
+ "vadd.s32 q13, q13, q14 \n"// accumulate result
+ "vadd.s32 q0, q0, q15 \n"// (+1) accumulate result
+ "vadd.s32 q0, q0, q13 \n"// (+1) accumulate result
+
+ "vshll.s16 q12, d6, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q13, d7, #15 \n"// extend samples to 31 bits
+
+ "vshll.s16 q14, d12, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q15, d13, #15 \n"// extend samples to 31 bits
+
+ "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef
+
+ "vadd.s32 q4, q4, q12 \n"// accumulate result
+ "vadd.s32 q13, q13, q14 \n"// accumulate result
+ "vadd.s32 q4, q4, q15 \n"// (+1) accumulate result
+ "vadd.s32 q4, q4, q13 \n"// (+1) accumulate result
+
+ "subs %[count], %[count], #8 \n"// update loop counter
+ "sub %[sP], %[sP], #32 \n"// move pointer to next set of samples
+
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_STEREO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [coefsP1] "+r" (coefsP1),
+ [coefsN1] "+r" (coefsN1),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [lerpP] "r" (lerpP),
+ [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q4", "q5", "q6",
+ "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15"
+ );
+}
+
+template <>
+inline void ProcessL<1, 8>(int32_t* const out,
+ int count,
+ const int16_t* coefsP,
+ const int16_t* coefsN,
+ const int16_t* sP,
+ const int16_t* sN,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 1; // template specialization does not preserve params
+ const int STRIDE = 8;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "veor q0, q0, q0 \n"// (0 - combines+) accumulator = 0
+
+ "1: \n"
+
+ "vld1.16 {d4}, [%[sP]] \n"// (2+0d) load 4 16-bits mono samples
+ "vld1.16 {d6}, [%[sN]]! \n"// (2) load 4 16-bits mono samples
+ "vld1.16 {d16}, [%[coefsP0]:64]! \n"// (1) load 4 16-bits coefs
+ "vld1.16 {d20}, [%[coefsN0]:64]! \n"// (1) load 4 16-bits coefs
+
+ "vrev64.16 d4, d4 \n"// (1) reversed s3, s2, s1, s0, s7, s6, s5, s4
+
+ // reordering the vmal to do d6, d7 before d4, d5 is slower(?)
+ "vmlal.s16 q0, d4, d16 \n"// (1) multiply (reversed)samples by coef
+ "vmlal.s16 q0, d6, d20 \n"// (1) multiply neg samples
+
+ // moving these ARM instructions before neon above seems to be slower
+ "subs %[count], %[count], #4 \n"// (1) update loop counter
+ "sub %[sP], %[sP], #8 \n"// (0) move pointer to next set of samples
+
+ // sP used after branch (warning)
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_MONO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q8", "q10"
+ );
+}
+
+template <>
+inline void ProcessL<2, 8>(int32_t* const out,
+ int count,
+ const int16_t* coefsP,
+ const int16_t* coefsN,
+ const int16_t* sP,
+ const int16_t* sN,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 2; // template specialization does not preserve params
+ const int STRIDE = 8;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "veor q0, q0, q0 \n"// (1) acc_L = 0
+ "veor q4, q4, q4 \n"// (0 combines+) acc_R = 0
+
+ "1: \n"
+
+ "vld2.16 {d4, d5}, [%[sP]] \n"// (2+0d) load 8 16-bits stereo samples
+ "vld2.16 {d6, d7}, [%[sN]]! \n"// (2) load 8 16-bits stereo samples
+ "vld1.16 {d16}, [%[coefsP0]:64]! \n"// (1) load 8 16-bits coefs
+ "vld1.16 {d20}, [%[coefsN0]:64]! \n"// (1) load 8 16-bits coefs
+
+ "vrev64.16 q2, q2 \n"// (1) reverse 8 frames of the left positive
+
+ "vmlal.s16 q0, d4, d16 \n"// (1) multiply (reversed) samples left
+ "vmlal.s16 q4, d5, d16 \n"// (1) multiply (reversed) samples right
+ "vmlal.s16 q0, d6, d20 \n"// (1) multiply samples left
+ "vmlal.s16 q4, d7, d20 \n"// (1) multiply samples right
+
+ // moving these ARM before neon seems to be slower
+ "subs %[count], %[count], #4 \n"// (1) update loop counter
+ "sub %[sP], %[sP], #16 \n"// (0) move pointer to next set of samples
+
+ // sP used after branch (warning)
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_STEREO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q4", "q5", "q6",
+ "q8", "q10"
+ );
+}
+
+template <>
+inline void Process<1, 8>(int32_t* const out,
+ int count,
+ const int16_t* coefsP,
+ const int16_t* coefsN,
+ const int16_t* coefsP1,
+ const int16_t* coefsN1,
+ const int16_t* sP,
+ const int16_t* sN,
+ uint32_t lerpP,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 1; // template specialization does not preserve params
+ const int STRIDE = 8;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "vmov.32 d2[0], %[lerpP] \n"// load the positive phase S32 Q15
+ "veor q0, q0, q0 \n"// (0 - combines+) accumulator = 0
+
+ "1: \n"
+
+ "vld1.16 {d4}, [%[sP]] \n"// (2+0d) load 4 16-bits mono samples
+ "vld1.16 {d6}, [%[sN]]! \n"// (2) load 4 16-bits mono samples
+ "vld1.16 {d16}, [%[coefsP0]:64]! \n"// (1) load 4 16-bits coefs
+ "vld1.16 {d17}, [%[coefsP1]:64]! \n"// (1) load 4 16-bits coefs for interpolation
+ "vld1.16 {d20}, [%[coefsN1]:64]! \n"// (1) load 4 16-bits coefs
+ "vld1.16 {d21}, [%[coefsN0]:64]! \n"// (1) load 4 16-bits coefs for interpolation
+
+ "vsub.s16 d17, d17, d16 \n"// (1) interpolate (step1) 1st set of coefs
+ "vsub.s16 d21, d21, d20 \n"// (1) interpolate (step1) 2nd set of coets
+
+ "vqrdmulh.s16 d17, d17, d2[0] \n"// (2) interpolate (step2) 1st set of coefs
+ "vqrdmulh.s16 d21, d21, d2[0] \n"// (2) interpolate (step2) 2nd set of coefs
+
+ "vrev64.16 d4, d4 \n"// (1) reverse s3, s2, s1, s0, s7, s6, s5, s4
+
+ "vadd.s16 d16, d16, d17 \n"// (1+2d) interpolate (step3) 1st set
+ "vadd.s16 d20, d20, d21 \n"// (1+1d) interpolate (step3) 2nd set
+
+ // reordering the vmal to do d6, d7 before d4, d5 is slower(?)
+ "vmlal.s16 q0, d4, d16 \n"// (1+0d) multiply (reversed)by coef
+ "vmlal.s16 q0, d6, d20 \n"// (1) multiply neg samples
+
+ // moving these ARM instructions before neon above seems to be slower
+ "subs %[count], %[count], #4 \n"// (1) update loop counter
+ "sub %[sP], %[sP], #8 \n"// move pointer to next set of samples
+
+ // sP used after branch (warning)
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_MONO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [coefsP1] "+r" (coefsP1),
+ [coefsN1] "+r" (coefsN1),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [lerpP] "r" (lerpP),
+ [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q8", "q9", "q10", "q11"
+ );
+}
+
+template <>
+inline void Process<2, 8>(int32_t* const out,
+ int count,
+ const int16_t* coefsP,
+ const int16_t* coefsN,
+ const int16_t* coefsP1,
+ const int16_t* coefsN1,
+ const int16_t* sP,
+ const int16_t* sN,
+ uint32_t lerpP,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 2; // template specialization does not preserve params
+ const int STRIDE = 8;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "vmov.32 d2[0], %[lerpP] \n"// load the positive phase
+ "veor q0, q0, q0 \n"// (1) acc_L = 0
+ "veor q4, q4, q4 \n"// (0 combines+) acc_R = 0
+
+ "1: \n"
+
+ "vld2.16 {d4, d5}, [%[sP]] \n"// (3+0d) load 8 16-bits stereo samples
+ "vld2.16 {d6, d7}, [%[sN]]! \n"// (3) load 8 16-bits stereo samples
+ "vld1.16 {d16}, [%[coefsP0]:64]! \n"// (1) load 8 16-bits coefs
+ "vld1.16 {d17}, [%[coefsP1]:64]! \n"// (1) load 8 16-bits coefs for interpolation
+ "vld1.16 {d20}, [%[coefsN1]:64]! \n"// (1) load 8 16-bits coefs
+ "vld1.16 {d21}, [%[coefsN0]:64]! \n"// (1) load 8 16-bits coefs for interpolation
+
+ "vsub.s16 d17, d17, d16 \n"// (1) interpolate (step1) 1st set of coefs
+ "vsub.s16 d21, d21, d20 \n"// (1) interpolate (step1) 2nd set of coets
+
+ "vqrdmulh.s16 d17, d17, d2[0] \n"// (2) interpolate (step2) 1st set of coefs
+ "vqrdmulh.s16 d21, d21, d2[0] \n"// (2) interpolate (step2) 2nd set of coefs
+
+ "vrev64.16 q2, q2 \n"// (1) reverse 8 frames of the left positive
+
+ "vadd.s16 d16, d16, d17 \n"// (1+1d) interpolate (step3) 1st set
+ "vadd.s16 d20, d20, d21 \n"// (1+1d) interpolate (step3) 2nd set
+
+ "vmlal.s16 q0, d4, d16 \n"// (1) multiply (reversed) samples left
+ "vmlal.s16 q4, d5, d16 \n"// (1) multiply (reversed) samples right
+ "vmlal.s16 q0, d6, d20 \n"// (1) multiply samples left
+ "vmlal.s16 q4, d7, d20 \n"// (1) multiply samples right
+
+ // moving these ARM before neon seems to be slower
+ "subs %[count], %[count], #4 \n"// (1) update loop counter
+ "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples
+
+ // sP used after branch (warning)
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_STEREO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [coefsP1] "+r" (coefsP1),
+ [coefsN1] "+r" (coefsN1),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [lerpP] "r" (lerpP),
+ [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q4", "q5", "q6",
+ "q8", "q9", "q10", "q11"
+ );
+}
+
+template <>
+inline void ProcessL<1, 8>(int32_t* const out,
+ int count,
+ const int32_t* coefsP,
+ const int32_t* coefsN,
+ const int16_t* sP,
+ const int16_t* sN,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 1; // template specialization does not preserve params
+ const int STRIDE = 8;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "veor q0, q0, q0 \n"// result, initialize to 0
+
+ "1: \n"
+
+ "vld1.16 {d4}, [%[sP]] \n"// load 4 16-bits mono samples
+ "vld1.16 {d6}, [%[sN]]! \n"// load 4 16-bits mono samples
+ "vld1.32 {q8}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs
+ "vld1.32 {q10}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs
+
+ "vrev64.16 d4, d4 \n"// reverse 2 frames of the positive side
+
+ "vshll.s16 q12, d4, #15 \n"// (stall) extend samples to 31 bits
+ "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits
+
+ "vqrdmulh.s32 q12, q12, q8 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef
+
+ "vadd.s32 q0, q0, q12 \n"// accumulate result
+ "vadd.s32 q0, q0, q14 \n"// (stall) accumulate result
+
+ "subs %[count], %[count], #4 \n"// update loop counter
+ "sub %[sP], %[sP], #8 \n"// move pointer to next set of samples
+
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_MONO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q8", "q9", "q10", "q11",
+ "q12", "q14"
+ );
+}
+
+template <>
+inline void ProcessL<2, 8>(int32_t* const out,
+ int count,
+ const int32_t* coefsP,
+ const int32_t* coefsN,
+ const int16_t* sP,
+ const int16_t* sN,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 2; // template specialization does not preserve params
+ const int STRIDE = 8;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "veor q0, q0, q0 \n"// result, initialize to 0
+ "veor q4, q4, q4 \n"// result, initialize to 0
+
+ "1: \n"
+
+ "vld2.16 {d4, d5}, [%[sP]] \n"// load 4 16-bits stereo samples
+ "vld2.16 {d6, d7}, [%[sN]]! \n"// load 4 16-bits stereo samples
+ "vld1.32 {q8}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs
+ "vld1.32 {q10}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs
+
+ "vrev64.16 q2, q2 \n"// reverse 2 frames of the positive side
+
+ "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits
+
+ "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q15, d7, #15 \n"// extend samples to 31 bits
+
+ "vqrdmulh.s32 q12, q12, q8 \n"// multiply samples by coef
+ "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by coef
+ "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by coef
+ "vqrdmulh.s32 q15, q15, q10 \n"// multiply samples by coef
+
+ "vadd.s32 q0, q0, q12 \n"// accumulate result
+ "vadd.s32 q4, q4, q13 \n"// accumulate result
+ "vadd.s32 q0, q0, q14 \n"// accumulate result
+ "vadd.s32 q4, q4, q15 \n"// accumulate result
+
+ "subs %[count], %[count], #4 \n"// update loop counter
+ "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples
+
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_STEREO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsN0] "+r" (coefsN),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3", "q4",
+ "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15"
+ );
+}
+
+template <>
+inline void Process<1, 8>(int32_t* const out,
+ int count,
+ const int32_t* coefsP,
+ const int32_t* coefsN,
+ const int32_t* coefsP1,
+ const int32_t* coefsN1,
+ const int16_t* sP,
+ const int16_t* sN,
+ uint32_t lerpP,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 1; // template specialization does not preserve params
+ const int STRIDE = 8;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "vmov.32 d2[0], %[lerpP] \n"// load the positive phase
+ "veor q0, q0, q0 \n"// result, initialize to 0
+
+ "1: \n"
+
+ "vld1.16 {d4}, [%[sP]] \n"// load 4 16-bits mono samples
+ "vld1.16 {d6}, [%[sN]]! \n"// load 4 16-bits mono samples
+ "vld1.32 {q8}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs
+ "vld1.32 {q9}, [%[coefsP1]:128]! \n"// load 4 32-bits coefs for interpolation
+ "vld1.32 {q10}, [%[coefsN1]:128]! \n"// load 4 32-bits coefs
+ "vld1.32 {q11}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs for interpolation
+
+ "vrev64.16 d4, d4 \n"// reverse 2 frames of the positive side
+
+ "vsub.s32 q9, q9, q8 \n"// interpolate (step1) 1st set of coefs
+ "vsub.s32 q11, q11, q10 \n"// interpolate (step1) 2nd set of coets
+ "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits
+
+ "vqrdmulh.s32 q9, q9, d2[0] \n"// interpolate (step2) 1st set of coefs
+ "vqrdmulh.s32 q11, q11, d2[0] \n"// interpolate (step2) 2nd set of coefs
+ "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits
+
+ "vadd.s32 q8, q8, q9 \n"// interpolate (step3) 1st set
+ "vadd.s32 q10, q10, q11 \n"// interpolate (step4) 2nd set
+
+ "vqrdmulh.s32 q12, q12, q8 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef
+
+ "vadd.s32 q0, q0, q12 \n"// accumulate result
+ "vadd.s32 q0, q0, q14 \n"// accumulate result
+
+ "subs %[count], %[count], #4 \n"// update loop counter
+ "sub %[sP], %[sP], #8 \n"// move pointer to next set of samples
+
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_MONO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsP1] "+r" (coefsP1),
+ [coefsN0] "+r" (coefsN),
+ [coefsN1] "+r" (coefsN1),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [lerpP] "r" (lerpP),
+ [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3",
+ "q8", "q9", "q10", "q11",
+ "q12", "q14"
+ );
+}
+
+template <>
+inline
+void Process<2, 8>(int32_t* const out,
+ int count,
+ const int32_t* coefsP,
+ const int32_t* coefsN,
+ const int32_t* coefsP1,
+ const int32_t* coefsN1,
+ const int16_t* sP,
+ const int16_t* sN,
+ uint32_t lerpP,
+ const int32_t* const volumeLR)
+{
+ const int CHANNELS = 2; // template specialization does not preserve params
+ const int STRIDE = 8;
+ sP -= CHANNELS*((STRIDE>>1)-1);
+ asm (
+ "vmov.32 d2[0], %[lerpP] \n"// load the positive phase
+ "veor q0, q0, q0 \n"// result, initialize to 0
+ "veor q4, q4, q4 \n"// result, initialize to 0
+
+ "1: \n"
+ "vld2.16 {d4, d5}, [%[sP]] \n"// load 4 16-bits stereo samples
+ "vld2.16 {d6, d7}, [%[sN]]! \n"// load 4 16-bits stereo samples
+ "vld1.32 {q8}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs
+ "vld1.32 {q9}, [%[coefsP1]:128]! \n"// load 4 32-bits coefs for interpolation
+ "vld1.32 {q10}, [%[coefsN1]:128]! \n"// load 4 32-bits coefs
+ "vld1.32 {q11}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs for interpolation
+
+ "vrev64.16 q2, q2 \n"// (reversed) 2 frames of the positive side
+
+ "vsub.s32 q9, q9, q8 \n"// interpolate (step1) 1st set of coefs
+ "vsub.s32 q11, q11, q10 \n"// interpolate (step1) 2nd set of coets
+ "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits
+
+ "vqrdmulh.s32 q9, q9, d2[0] \n"// interpolate (step2) 1st set of coefs
+ "vqrdmulh.s32 q11, q11, d2[1] \n"// interpolate (step3) 2nd set of coefs
+ "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits
+ "vshll.s16 q15, d7, #15 \n"// extend samples to 31 bits
+
+ "vadd.s32 q8, q8, q9 \n"// interpolate (step3) 1st set
+ "vadd.s32 q10, q10, q11 \n"// interpolate (step4) 2nd set
+
+ "vqrdmulh.s32 q12, q12, q8 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef
+ "vqrdmulh.s32 q15, q15, q10 \n"// multiply samples by interpolated coef
+
+ "vadd.s32 q0, q0, q12 \n"// accumulate result
+ "vadd.s32 q4, q4, q13 \n"// accumulate result
+ "vadd.s32 q0, q0, q14 \n"// accumulate result
+ "vadd.s32 q4, q4, q15 \n"// accumulate result
+
+ "subs %[count], %[count], #4 \n"// update loop counter
+ "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples
+
+ "bne 1b \n"// loop
+
+ ASSEMBLY_ACCUMULATE_STEREO
+
+ : [out] "=Uv" (out[0]),
+ [count] "+r" (count),
+ [coefsP0] "+r" (coefsP),
+ [coefsP1] "+r" (coefsP1),
+ [coefsN0] "+r" (coefsN),
+ [coefsN1] "+r" (coefsN1),
+ [sP] "+r" (sP),
+ [sN] "+r" (sN)
+ : [lerpP] "r" (lerpP),
+ [vLR] "r" (volumeLR)
+ : "cc", "memory",
+ "q0", "q1", "q2", "q3", "q4",
+ "q8", "q9", "q10", "q11",
+ "q12", "q13", "q14", "q15"
+ );
+}
+
+#endif //USE_NEON
+
+}; // namespace android
+
+#endif /*ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_NEON_H*/
diff --git a/services/audioflinger/Threads.cpp b/services/audioflinger/Threads.cpp
index 35b8575..01b90a8 100644
--- a/services/audioflinger/Threads.cpp
+++ b/services/audioflinger/Threads.cpp
@@ -3047,15 +3047,8 @@ AudioFlinger::PlaybackThread::mixer_state AudioFlinger::MixerThread::prepareTrac
(mMixerStatusIgnoringFastTracks == MIXER_TRACKS_READY)) {
minFrames = desiredFrames;
}
- // It's not safe to call framesReady() for a static buffer track, so assume it's ready
- size_t framesReady;
- if (track->sharedBuffer() == 0) {
- framesReady = track->framesReady();
- } else if (track->isStopped()) {
- framesReady = 0;
- } else {
- framesReady = 1;
- }
+
+ size_t framesReady = track->framesReady();
if ((framesReady >= minFrames) && track->isReady() &&
!track->isPaused() && !track->isTerminated())
{
diff --git a/services/audioflinger/Tracks.cpp b/services/audioflinger/Tracks.cpp
index 53196c8..a5b9ac5 100644
--- a/services/audioflinger/Tracks.cpp
+++ b/services/audioflinger/Tracks.cpp
@@ -564,7 +564,7 @@ size_t AudioFlinger::PlaybackThread::Track::framesReleased() const
// Don't call for fast tracks; the framesReady() could result in priority inversion
bool AudioFlinger::PlaybackThread::Track::isReady() const {
- if (mFillingUpStatus != FS_FILLING || isStopped() || isPausing()) {
+ if (mFillingUpStatus != FS_FILLING || isStopped() || isPausing() || isStopping()) {
return true;
}
diff --git a/services/audioflinger/test-resample.cpp b/services/audioflinger/test-resample.cpp
index 0d00a0f..66fcd90 100644
--- a/services/audioflinger/test-resample.cpp
+++ b/services/audioflinger/test-resample.cpp
@@ -33,8 +33,9 @@ using namespace android;
bool gVerbose = false;
static int usage(const char* name) {
- fprintf(stderr,"Usage: %s [-p] [-h] [-v] [-s] [-q {dq|lq|mq|hq|vhq}] [-i input-sample-rate] "
- "[-o output-sample-rate] [<input-file>] <output-file>\n", name);
+ fprintf(stderr,"Usage: %s [-p] [-h] [-v] [-s] [-q {dq|lq|mq|hq|vhq|dlq|dmq|dhq}]"
+ " [-i input-sample-rate] [-o output-sample-rate] [<input-file>]"
+ " <output-file>\n", name);
fprintf(stderr," -p enable profiling\n");
fprintf(stderr," -h create wav file\n");
fprintf(stderr," -v verbose : log buffer provider calls\n");
@@ -45,6 +46,9 @@ static int usage(const char* name) {
fprintf(stderr," mq : medium quality\n");
fprintf(stderr," hq : high quality\n");
fprintf(stderr," vhq : very high quality\n");
+ fprintf(stderr," dlq : dynamic low quality\n");
+ fprintf(stderr," dmq : dynamic medium quality\n");
+ fprintf(stderr," dhq : dynamic high quality\n");
fprintf(stderr," -i input file sample rate (ignored if input file is specified)\n");
fprintf(stderr," -o output file sample rate\n");
return -1;
@@ -53,7 +57,8 @@ static int usage(const char* name) {
int main(int argc, char* argv[]) {
const char* const progname = argv[0];
- bool profiling = false;
+ bool profileResample = false;
+ bool profileFilter = false;
bool writeHeader = false;
int channels = 1;
int input_freq = 0;
@@ -61,10 +66,13 @@ int main(int argc, char* argv[]) {
AudioResampler::src_quality quality = AudioResampler::DEFAULT_QUALITY;
int ch;
- while ((ch = getopt(argc, argv, "phvsq:i:o:")) != -1) {
+ while ((ch = getopt(argc, argv, "pfhvsq:i:o:")) != -1) {
switch (ch) {
case 'p':
- profiling = true;
+ profileResample = true;
+ break;
+ case 'f':
+ profileFilter = true;
break;
case 'h':
writeHeader = true;
@@ -86,6 +94,12 @@ int main(int argc, char* argv[]) {
quality = AudioResampler::HIGH_QUALITY;
else if (!strcmp(optarg, "vhq"))
quality = AudioResampler::VERY_HIGH_QUALITY;
+ else if (!strcmp(optarg, "dlq"))
+ quality = AudioResampler::DYN_LOW_QUALITY;
+ else if (!strcmp(optarg, "dmq"))
+ quality = AudioResampler::DYN_MED_QUALITY;
+ else if (!strcmp(optarg, "dhq"))
+ quality = AudioResampler::DYN_HIGH_QUALITY;
else {
usage(progname);
return -1;
@@ -137,6 +151,8 @@ int main(int argc, char* argv[]) {
channels = info.channels;
input_freq = info.samplerate;
} else {
+ // data for testing is exactly (input sampling rate/1000)/2 seconds
+ // so 44.1khz input is 22.05 seconds
double k = 1000; // Hz / s
double time = (input_freq / 2) / k;
size_t input_frames = size_t(input_freq * time);
@@ -148,7 +164,7 @@ int main(int argc, char* argv[]) {
double y = sin(M_PI * k * t * t);
int16_t yi = floor(y * 32767.0 + 0.5);
for (size_t j=0 ; j<(size_t)channels ; j++) {
- in[i*channels + j] = yi / (1+j);
+ in[i*channels + j] = yi / (1+j); // right ch. 1/2 left ch.
}
}
}
@@ -170,6 +186,7 @@ int main(int argc, char* argv[]) {
}
virtual status_t getNextBuffer(Buffer* buffer,
int64_t pts = kInvalidPTS) {
+ (void)pts; // suppress warning
size_t requestedFrames = buffer->frameCount;
if (requestedFrames > mNumFrames - mNextFrame) {
buffer->frameCount = mNumFrames - mNextFrame;
@@ -202,6 +219,11 @@ int main(int argc, char* argv[]) {
mNextFrame += buffer->frameCount;
mUnrel -= buffer->frameCount;
}
+ buffer->frameCount = 0;
+ buffer->i16 = NULL;
+ }
+ void reset() {
+ mNextFrame = 0;
}
} provider(input_vaddr, input_size, channels);
@@ -212,37 +234,110 @@ int main(int argc, char* argv[]) {
size_t output_size = 2 * 4 * ((int64_t) input_frames * output_freq) / input_freq;
output_size &= ~7; // always stereo, 32-bits
- void* output_vaddr = malloc(output_size);
-
- if (profiling) {
+ if (profileFilter) {
+ // Check how fast sample rate changes are that require filter changes.
+ // The delta sample rate changes must indicate a downsampling ratio,
+ // and must be larger than 10% changes.
+ //
+ // On fast devices, filters should be generated between 0.1ms - 1ms.
+ // (single threaded).
AudioResampler* resampler = AudioResampler::create(16, channels,
- output_freq, quality);
-
- size_t out_frames = output_size/8;
- resampler->setSampleRate(input_freq);
- resampler->setVolume(0x1000, 0x1000);
-
- memset(output_vaddr, 0, output_size);
+ 8000, quality);
+ int looplimit = 100;
timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
- resampler->resample((int*) output_vaddr, out_frames, &provider);
- resampler->resample((int*) output_vaddr, out_frames, &provider);
- resampler->resample((int*) output_vaddr, out_frames, &provider);
- resampler->resample((int*) output_vaddr, out_frames, &provider);
+ for (int i = 0; i < looplimit; ++i) {
+ resampler->setSampleRate(9000);
+ resampler->setSampleRate(12000);
+ resampler->setSampleRate(20000);
+ resampler->setSampleRate(30000);
+ }
clock_gettime(CLOCK_MONOTONIC, &end);
int64_t start_ns = start.tv_sec * 1000000000LL + start.tv_nsec;
int64_t end_ns = end.tv_sec * 1000000000LL + end.tv_nsec;
- int64_t time = (end_ns - start_ns)/4;
- printf("%f Mspl/s\n", out_frames/(time/1e9)/1e6);
+ int64_t time = end_ns - start_ns;
+ printf("%.2f sample rate changes with filter calculation/sec\n",
+ looplimit * 4 / (time / 1e9));
+ // Check how fast sample rate changes are without filter changes.
+ // This should be very fast, probably 0.1us - 1us per sample rate
+ // change.
+ resampler->setSampleRate(1000);
+ looplimit = 1000;
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ for (int i = 0; i < looplimit; ++i) {
+ resampler->setSampleRate(1000+i);
+ }
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ start_ns = start.tv_sec * 1000000000LL + start.tv_nsec;
+ end_ns = end.tv_sec * 1000000000LL + end.tv_nsec;
+ time = end_ns - start_ns;
+ printf("%.2f sample rate changes without filter calculation/sec\n",
+ looplimit / (time / 1e9));
+ resampler->reset();
delete resampler;
}
+ void* output_vaddr = malloc(output_size);
AudioResampler* resampler = AudioResampler::create(16, channels,
output_freq, quality);
size_t out_frames = output_size/8;
+
+ /* set volume precision to 12 bits, so the volume scale is 1<<12.
+ * This means the "integer" part fits in the Q19.12 precision
+ * representation of output int32_t.
+ *
+ * Generally 0 < volumePrecision <= 14 (due to the limits of
+ * int16_t values for Volume). volumePrecision cannot be 0 due
+ * to rounding and shifts.
+ */
+ const int volumePrecision = 12; // in bits
+
resampler->setSampleRate(input_freq);
- resampler->setVolume(0x1000, 0x1000);
+ resampler->setVolume(1 << volumePrecision, 1 << volumePrecision);
+
+ if (profileResample) {
+ /*
+ * For profiling on mobile devices, upon experimentation
+ * it is better to run a few trials with a shorter loop limit,
+ * and take the minimum time.
+ *
+ * Long tests can cause CPU temperature to build up and thermal throttling
+ * to reduce CPU frequency.
+ *
+ * For frequency checks (index=0, or 1, etc.):
+ * "cat /sys/devices/system/cpu/cpu${index}/cpufreq/scaling_*_freq"
+ *
+ * For temperature checks (index=0, or 1, etc.):
+ * "cat /sys/class/thermal/thermal_zone${index}/temp"
+ *
+ * Another way to avoid thermal throttling is to fix the CPU frequency
+ * at a lower level which prevents excessive temperatures.
+ */
+ const int trials = 4;
+ const int looplimit = 4;
+ timespec start, end;
+ int64_t time;
+
+ for (int n = 0; n < trials; ++n) {
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ for (int i = 0; i < looplimit; ++i) {
+ resampler->resample((int*) output_vaddr, out_frames, &provider);
+ provider.reset(); // during benchmarking reset only the provider
+ }
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ int64_t start_ns = start.tv_sec * 1000000000LL + start.tv_nsec;
+ int64_t end_ns = end.tv_sec * 1000000000LL + end.tv_nsec;
+ int64_t diff_ns = end_ns - start_ns;
+ if (n == 0 || diff_ns < time) {
+ time = diff_ns; // save the best out of our trials.
+ }
+ }
+ // Mfrms/s is "Millions of output frames per second".
+ printf("quality: %d channels: %d msec: %lld Mfrms/s: %.2lf\n",
+ quality, channels, time/1000000, out_frames * looplimit / (time / 1e9) / 1e6);
+ resampler->reset();
+ }
memset(output_vaddr, 0, output_size);
if (gVerbose) {
@@ -256,15 +351,31 @@ int main(int argc, char* argv[]) {
if (gVerbose) {
printf("reset() complete\n");
}
+ delete resampler;
+ resampler = NULL;
- // down-mix (we just truncate and keep the left channel)
+ // mono takes left channel only
+ // stereo right channel is half amplitude of stereo left channel (due to input creation)
int32_t* out = (int32_t*) output_vaddr;
int16_t* convert = (int16_t*) malloc(out_frames * channels * sizeof(int16_t));
+
+ // round to half towards zero and saturate at int16 (non-dithered)
+ const int roundVal = (1<<(volumePrecision-1)) - 1; // volumePrecision > 0
+
for (size_t i = 0; i < out_frames; i++) {
- for (int j=0 ; j<channels ; j++) {
- int32_t s = out[i * 2 + j] >> 12;
- if (s > 32767) s = 32767;
- else if (s < -32768) s = -32768;
+ for (int j = 0; j < channels; j++) {
+ int32_t s = out[i * 2 + j] + roundVal; // add offset here
+ if (s < 0) {
+ s = (s + 1) >> volumePrecision; // round to 0
+ if (s < -32768) {
+ s = -32768;
+ }
+ } else {
+ s = s >> volumePrecision;
+ if (s > 32767) {
+ s = 32767;
+ }
+ }
convert[i * channels + j] = int16_t(s);
}
}
diff --git a/services/camera/libcameraservice/api2/CameraDeviceClient.cpp b/services/camera/libcameraservice/api2/CameraDeviceClient.cpp
index 1cdf8dc..187220e 100644
--- a/services/camera/libcameraservice/api2/CameraDeviceClient.cpp
+++ b/services/camera/libcameraservice/api2/CameraDeviceClient.cpp
@@ -635,26 +635,56 @@ status_t CameraDeviceClient::getRotationTransformLocked(int32_t* transform) {
return INVALID_OPERATION;
}
+ camera_metadata_ro_entry_t entryFacing = staticInfo.find(ANDROID_LENS_FACING);
+ if (entry.count == 0) {
+ ALOGE("%s: Camera %d: Can't find android.lens.facing in "
+ "static metadata!", __FUNCTION__, mCameraId);
+ return INVALID_OPERATION;
+ }
+
int32_t& flags = *transform;
+ bool mirror = (entryFacing.data.u8[0] == ANDROID_LENS_FACING_FRONT);
int orientation = entry.data.i32[0];
- switch (orientation) {
- case 0:
- flags = 0;
- break;
- case 90:
- flags = NATIVE_WINDOW_TRANSFORM_ROT_90;
- break;
- case 180:
- flags = NATIVE_WINDOW_TRANSFORM_ROT_180;
- break;
- case 270:
- flags = NATIVE_WINDOW_TRANSFORM_ROT_270;
- break;
- default:
- ALOGE("%s: Invalid HAL android.sensor.orientation value: %d",
- __FUNCTION__, orientation);
- return INVALID_OPERATION;
+ if (!mirror) {
+ switch (orientation) {
+ case 0:
+ flags = 0;
+ break;
+ case 90:
+ flags = NATIVE_WINDOW_TRANSFORM_ROT_90;
+ break;
+ case 180:
+ flags = NATIVE_WINDOW_TRANSFORM_ROT_180;
+ break;
+ case 270:
+ flags = NATIVE_WINDOW_TRANSFORM_ROT_270;
+ break;
+ default:
+ ALOGE("%s: Invalid HAL android.sensor.orientation value: %d",
+ __FUNCTION__, orientation);
+ return INVALID_OPERATION;
+ }
+ } else {
+ switch (orientation) {
+ case 0:
+ flags = HAL_TRANSFORM_FLIP_H;
+ break;
+ case 90:
+ flags = HAL_TRANSFORM_FLIP_H | HAL_TRANSFORM_ROT_90;
+ break;
+ case 180:
+ flags = HAL_TRANSFORM_FLIP_V;
+ break;
+ case 270:
+ flags = HAL_TRANSFORM_FLIP_V | HAL_TRANSFORM_ROT_90;
+ break;
+ default:
+ ALOGE("%s: Invalid HAL android.sensor.orientation value: %d",
+ __FUNCTION__, orientation);
+ return INVALID_OPERATION;
+ }
+
}
/**
diff --git a/tools/resampler_tools/fir.cpp b/tools/resampler_tools/fir.cpp
index cc3d509..3d6a74d 100644
--- a/tools/resampler_tools/fir.cpp
+++ b/tools/resampler_tools/fir.cpp
@@ -20,15 +20,25 @@
#include <stdlib.h>
#include <string.h>
-static double sinc(double x) {
+static inline double sinc(double x) {
if (fabs(x) == 0.0f) return 1.0f;
return sin(x) / x;
}
-static double sqr(double x) {
+static inline double sqr(double x) {
return x*x;
}
+static inline int64_t toint(double x, int64_t maxval) {
+ int64_t v;
+
+ v = static_cast<int64_t>(floor(x * maxval + 0.5));
+ if (v >= maxval) {
+ return maxval - 1; // error!
+ }
+ return v;
+}
+
static double I0(double x) {
// from the Numerical Recipes in C p. 237
double ax,ans,y;
@@ -54,11 +64,12 @@ static double kaiser(int k, int N, double beta) {
return I0(beta * sqrt(1.0 - sqr((2.0*k)/N - 1.0))) / I0(beta);
}
-
static void usage(char* name) {
fprintf(stderr,
- "usage: %s [-h] [-d] [-s sample_rate] [-c cut-off_frequency] [-n half_zero_crossings] [-f {float|fixed}] [-b beta] [-v dBFS] [-l lerp]\n"
- " %s [-h] [-d] [-s sample_rate] [-c cut-off_frequency] [-n half_zero_crossings] [-f {float|fixed}] [-b beta] [-v dBFS] -p M/N\n"
+ "usage: %s [-h] [-d] [-s sample_rate] [-c cut-off_frequency] [-n half_zero_crossings]"
+ " [-f {float|fixed|fixed16}] [-b beta] [-v dBFS] [-l lerp]\n"
+ " %s [-h] [-d] [-s sample_rate] [-c cut-off_frequency] [-n half_zero_crossings]"
+ " [-f {float|fixed|fixed16}] [-b beta] [-v dBFS] -p M/N\n"
" -h this help message\n"
" -d debug, print comma-separated coefficient table\n"
" -p generate poly-phase filter coefficients, with sample increment M/N\n"
@@ -66,6 +77,7 @@ static void usage(char* name) {
" -c cut-off frequency (20478)\n"
" -n number of zero-crossings on one side (8)\n"
" -l number of lerping bits (4)\n"
+ " -m number of polyphases (related to -l, default 16)\n"
" -f output format, can be fixed-point or floating-point (fixed)\n"
" -b kaiser window parameter beta (7.865 [-80dB])\n"
" -v attenuation in dBFS (0)\n",
@@ -77,8 +89,7 @@ static void usage(char* name) {
int main(int argc, char** argv)
{
// nc is the number of bits to store the coefficients
- const int nc = 32;
-
+ int nc = 32;
bool polyphase = false;
unsigned int polyM = 160;
unsigned int polyN = 147;
@@ -88,7 +99,6 @@ int main(int argc, char** argv)
double atten = 1;
int format = 0;
-
// in order to keep the errors associated with the linear
// interpolation of the coefficients below the quantization error
// we must satisfy:
@@ -104,7 +114,6 @@ int main(int argc, char** argv)
// Smith, J.O. Digital Audio Resampling Home Page
// https://ccrma.stanford.edu/~jos/resample/, 2011-03-29
//
- int nz = 4;
// | 0.1102*(A - 8.7) A > 50
// beta = | 0.5842*(A - 21)^0.4 + 0.07886*(A - 21) 21 <= A <= 50
@@ -123,7 +132,6 @@ int main(int argc, char** argv)
// 100 dB 10.056
double beta = 7.865;
-
// 2*nzc = (A - 8) / (2.285 * dw)
// with dw the transition width = 2*pi*dF/Fs
//
@@ -148,8 +156,9 @@ int main(int argc, char** argv)
// nzc = 20
//
+ int M = 1 << 4; // number of phases for interpolation
int ch;
- while ((ch = getopt(argc, argv, ":hds:c:n:f:l:b:p:v:")) != -1) {
+ while ((ch = getopt(argc, argv, ":hds:c:n:f:l:m:b:p:v:z:")) != -1) {
switch (ch) {
case 'd':
debug = true;
@@ -169,13 +178,26 @@ int main(int argc, char** argv)
case 'n':
nzc = atoi(optarg);
break;
+ case 'm':
+ M = atoi(optarg);
+ break;
case 'l':
- nz = atoi(optarg);
+ M = 1 << atoi(optarg);
break;
case 'f':
- if (!strcmp(optarg,"fixed")) format = 0;
- else if (!strcmp(optarg,"float")) format = 1;
- else usage(argv[0]);
+ if (!strcmp(optarg, "fixed")) {
+ format = 0;
+ }
+ else if (!strcmp(optarg, "fixed16")) {
+ format = 0;
+ nc = 16;
+ }
+ else if (!strcmp(optarg, "float")) {
+ format = 1;
+ }
+ else {
+ usage(argv[0]);
+ }
break;
case 'b':
beta = atof(optarg);
@@ -193,11 +215,14 @@ int main(int argc, char** argv)
// cut off frequency ratio Fc/Fs
double Fcr = Fc / Fs;
-
// total number of coefficients (one side)
- const int M = (1 << nz);
+
const int N = M * nzc;
+ // lerp (which is most useful if M is a power of 2)
+
+ int nz = 0; // recalculate nz as the bits needed to represent M
+ for (int i = M-1 ; i; i>>=1, nz++);
// generate the right half of the filter
if (!debug) {
printf("// cmd-line: ");
@@ -207,7 +232,7 @@ int main(int argc, char** argv)
printf("\n");
if (!polyphase) {
printf("const int32_t RESAMPLE_FIR_SIZE = %d;\n", N);
- printf("const int32_t RESAMPLE_FIR_LERP_INT_BITS = %d;\n", nz);
+ printf("const int32_t RESAMPLE_FIR_INT_PHASES = %d;\n", M);
printf("const int32_t RESAMPLE_FIR_NUM_COEF = %d;\n", nzc);
} else {
printf("const int32_t RESAMPLE_FIR_SIZE = %d;\n", 2*nzc*polyN);
@@ -224,7 +249,7 @@ int main(int argc, char** argv)
for (int i=0 ; i<=M ; i++) { // an extra set of coefs for interpolation
for (int j=0 ; j<nzc ; j++) {
int ix = j*M + i;
- double x = (2.0 * M_PI * ix * Fcr) / (1 << nz);
+ double x = (2.0 * M_PI * ix * Fcr) / M;
double y = kaiser(ix+N, 2*N, beta) * sinc(x) * 2.0 * Fcr;
y *= atten;
@@ -232,11 +257,13 @@ int main(int argc, char** argv)
if (j == 0)
printf("\n ");
}
-
if (!format) {
- int64_t yi = floor(y * ((1ULL<<(nc-1))) + 0.5);
- if (yi >= (1LL<<(nc-1))) yi = (1LL<<(nc-1))-1;
- printf("0x%08x, ", int32_t(yi));
+ int64_t yi = toint(y, 1ULL<<(nc-1));
+ if (nc > 16) {
+ printf("0x%08x, ", int32_t(yi));
+ } else {
+ printf("0x%04x, ", int32_t(yi)&0xffff);
+ }
} else {
printf("%.9g%s ", y, debug ? "," : "f,");
}
@@ -254,9 +281,12 @@ int main(int argc, char** argv)
double y = kaiser(i+N, 2*N, beta) * sinc(x) * 2.0 * Fcr;;
y *= atten;
if (!format) {
- int64_t yi = floor(y * ((1ULL<<(nc-1))) + 0.5);
- if (yi >= (1LL<<(nc-1))) yi = (1LL<<(nc-1))-1;
- printf("0x%08x", int32_t(yi));
+ int64_t yi = toint(y, 1ULL<<(nc-1));
+ if (nc > 16) {
+ printf("0x%08x, ", int32_t(yi));
+ } else {
+ printf("0x%04x, ", int32_t(yi)&0xffff);
+ }
} else {
printf("%.9g%s", y, debug ? "" : "f");
}
@@ -277,5 +307,3 @@ int main(int argc, char** argv)
}
// http://www.csee.umbc.edu/help/sound/AFsp-V2R1/html/audio/ResampAudio.html
-
-