diff options
43 files changed, 3734 insertions, 250 deletions
diff --git a/cmds/screenrecord/screenrecord.cpp b/cmds/screenrecord/screenrecord.cpp index 61f83e3..b6f150c 100644 --- a/cmds/screenrecord/screenrecord.cpp +++ b/cmds/screenrecord/screenrecord.cpp @@ -45,6 +45,7 @@ #include <signal.h> #include <getopt.h> #include <sys/wait.h> +#include <termios.h> #include <assert.h> #include "screenrecord.h" @@ -61,6 +62,7 @@ static const uint32_t kFallbackHeight = 720; // Command-line parameters. static bool gVerbose = false; // chatty on stdout static bool gRotate = false; // rotate 90 degrees +static bool gRawOutput = false; // generate raw H.264 byte stream output static bool gSizeSpecified = false; // was size explicitly requested? static bool gWantInfoScreen = false; // do we want initial info screen? static bool gWantFrameTime = false; // do we want times on each frame? @@ -298,10 +300,12 @@ static status_t prepareVirtualDisplay(const DisplayInfo& mainDpyInfo, * input frames are coming from the virtual display as fast as SurfaceFlinger * wants to send them. * + * Exactly one of muxer or rawFp must be non-null. + * * The muxer must *not* have been started before calling. */ static status_t runEncoder(const sp<MediaCodec>& encoder, - const sp<MediaMuxer>& muxer, const sp<IBinder>& mainDpy, + const sp<MediaMuxer>& muxer, FILE* rawFp, const sp<IBinder>& mainDpy, const sp<IBinder>& virtualDpy, uint8_t orientation) { static int kTimeout = 250000; // be responsive on signal status_t err; @@ -311,6 +315,8 @@ static status_t runEncoder(const sp<MediaCodec>& encoder, int64_t endWhenNsec = startWhenNsec + seconds_to_nanoseconds(gTimeLimitSec); DisplayInfo mainDpyInfo; + assert((rawFp == NULL && muxer != NULL) || (rawFp != NULL && muxer == NULL)); + Vector<sp<ABuffer> > buffers; err = encoder->getOutputBuffers(&buffers); if (err != NO_ERROR) { @@ -342,15 +348,16 @@ static status_t runEncoder(const sp<MediaCodec>& encoder, case NO_ERROR: // got a buffer if ((flags & MediaCodec::BUFFER_FLAG_CODECCONFIG) != 0) { - // ignore this -- we passed the CSD into MediaMuxer when - // we got the format change notification - ALOGV("Got codec config buffer (%u bytes); ignoring", size); - size = 0; + ALOGV("Got codec config buffer (%u bytes)", size); + if (muxer != NULL) { + // ignore this -- we passed the CSD into MediaMuxer when + // we got the format change notification + size = 0; + } } if (size != 0) { ALOGV("Got data in buffer %d, size=%d, pts=%lld", bufIndex, size, ptsUsec); - assert(trackIdx != -1); { // scope ATRACE_NAME("orientation"); @@ -379,14 +386,23 @@ static status_t runEncoder(const sp<MediaCodec>& encoder, ptsUsec = systemTime(SYSTEM_TIME_MONOTONIC) / 1000; } - // The MediaMuxer docs are unclear, but it appears that we - // need to pass either the full set of BufferInfo flags, or - // (flags & BUFFER_FLAG_SYNCFRAME). - // - // If this blocks for too long we could drop frames. We may - // want to queue these up and do them on a different thread. - { // scope + if (muxer == NULL) { + fwrite(buffers[bufIndex]->data(), 1, size, rawFp); + // Flush the data immediately in case we're streaming. + // We don't want to do this if all we've written is + // the SPS/PPS data because mplayer gets confused. + if ((flags & MediaCodec::BUFFER_FLAG_CODECCONFIG) == 0) { + fflush(rawFp); + } + } else { + // The MediaMuxer docs are unclear, but it appears that we + // need to pass either the full set of BufferInfo flags, or + // (flags & BUFFER_FLAG_SYNCFRAME). + // + // If this blocks for too long we could drop frames. We may + // want to queue these up and do them on a different thread. ATRACE_NAME("write sample"); + assert(trackIdx != -1); err = muxer->writeSampleData(buffers[bufIndex], trackIdx, ptsUsec, flags); if (err != NO_ERROR) { @@ -418,12 +434,14 @@ static status_t runEncoder(const sp<MediaCodec>& encoder, ALOGV("Encoder format changed"); sp<AMessage> newFormat; encoder->getOutputFormat(&newFormat); - trackIdx = muxer->addTrack(newFormat); - ALOGV("Starting muxer"); - err = muxer->start(); - if (err != NO_ERROR) { - fprintf(stderr, "Unable to start muxer (err=%d)\n", err); - return err; + if (muxer != NULL) { + trackIdx = muxer->addTrack(newFormat); + ALOGV("Starting muxer"); + err = muxer->start(); + if (err != NO_ERROR) { + fprintf(stderr, "Unable to start muxer (err=%d)\n", err); + return err; + } } } break; @@ -457,6 +475,44 @@ static status_t runEncoder(const sp<MediaCodec>& encoder, } /* + * Raw H.264 byte stream output requested. Send the output to stdout + * if desired. If the output is a tty, reconfigure it to avoid the + * CRLF line termination that we see with "adb shell" commands. + */ +static FILE* prepareRawOutput(const char* fileName) { + FILE* rawFp = NULL; + + if (strcmp(fileName, "-") == 0) { + if (gVerbose) { + fprintf(stderr, "ERROR: verbose output and '-' not compatible"); + return NULL; + } + rawFp = stdout; + } else { + rawFp = fopen(fileName, "w"); + if (rawFp == NULL) { + fprintf(stderr, "fopen raw failed: %s\n", strerror(errno)); + return NULL; + } + } + + int fd = fileno(rawFp); + if (isatty(fd)) { + // best effort -- reconfigure tty for "raw" + ALOGD("raw video output to tty (fd=%d)", fd); + struct termios term; + if (tcgetattr(fd, &term) == 0) { + cfmakeraw(&term); + if (tcsetattr(fd, TCSANOW, &term) == 0) { + ALOGD("tty successfully configured for raw"); + } + } + } + + return rawFp; +} + +/* * Main "do work" method. * * Configures codec, muxer, and virtual display, then starts moving bits @@ -558,16 +614,26 @@ static status_t recordScreen(const char* fileName) { return err; } - // Configure muxer. We have to wait for the CSD blob from the encoder - // before we can start it. - sp<MediaMuxer> muxer = new MediaMuxer(fileName, - MediaMuxer::OUTPUT_FORMAT_MPEG_4); - if (gRotate) { - muxer->setOrientationHint(90); // TODO: does this do anything? + sp<MediaMuxer> muxer = NULL; + FILE* rawFp = NULL; + if (gRawOutput) { + rawFp = prepareRawOutput(fileName); + if (rawFp == NULL) { + encoder->release(); + return -1; + } + } else { + // Configure muxer. We have to wait for the CSD blob from the encoder + // before we can start it. + muxer = new MediaMuxer(fileName, MediaMuxer::OUTPUT_FORMAT_MPEG_4); + if (gRotate) { + muxer->setOrientationHint(90); // TODO: does this do anything? + } } // Main encoder loop. - err = runEncoder(encoder, muxer, mainDpy, dpy, mainDpyInfo.orientation); + err = runEncoder(encoder, muxer, rawFp, mainDpy, dpy, + mainDpyInfo.orientation); if (err != NO_ERROR) { fprintf(stderr, "Encoder failed (err=%d)\n", err); // fall through to cleanup @@ -584,9 +650,13 @@ static status_t recordScreen(const char* fileName) { overlay->stop(); } encoder->stop(); - // If we don't stop muxer explicitly, i.e. let the destructor run, - // it may hang (b/11050628). - muxer->stop(); + if (muxer != NULL) { + // If we don't stop muxer explicitly, i.e. let the destructor run, + // it may hang (b/11050628). + muxer->stop(); + } else if (rawFp != stdout) { + fclose(rawFp); + } encoder->release(); return err; @@ -753,6 +823,7 @@ int main(int argc, char* const argv[]) { { "show-frame-time", no_argument, NULL, 'f' }, { "bugreport", no_argument, NULL, 'u' }, { "rotate", no_argument, NULL, 'r' }, + { "raw", no_argument, NULL, 'w' }, { NULL, 0, NULL, 0 } }; @@ -818,6 +889,10 @@ int main(int argc, char* const argv[]) { // experimental feature gRotate = true; break; + case 'w': + // experimental feature + gRawOutput = true; + break; default: if (ic != '?') { fprintf(stderr, "getopt_long returned unexpected value 0x%x\n", ic); @@ -831,17 +906,19 @@ int main(int argc, char* const argv[]) { return 2; } - // MediaMuxer tries to create the file in the constructor, but we don't - // learn about the failure until muxer.start(), which returns a generic - // error code without logging anything. We attempt to create the file - // now for better diagnostics. const char* fileName = argv[optind]; - int fd = open(fileName, O_CREAT | O_RDWR, 0644); - if (fd < 0) { - fprintf(stderr, "Unable to open '%s': %s\n", fileName, strerror(errno)); - return 1; + if (!gRawOutput) { + // MediaMuxer tries to create the file in the constructor, but we don't + // learn about the failure until muxer.start(), which returns a generic + // error code without logging anything. We attempt to create the file + // now for better diagnostics. + int fd = open(fileName, O_CREAT | O_RDWR, 0644); + if (fd < 0) { + fprintf(stderr, "Unable to open '%s': %s\n", fileName, strerror(errno)); + return 1; + } + close(fd); } - close(fd); status_t err = recordScreen(fileName); if (err == NO_ERROR) { diff --git a/cmds/screenrecord/screenrecord.h b/cmds/screenrecord/screenrecord.h index 95e8a68..9b058c2 100644 --- a/cmds/screenrecord/screenrecord.h +++ b/cmds/screenrecord/screenrecord.h @@ -18,6 +18,6 @@ #define SCREENRECORD_SCREENRECORD_H #define kVersionMajor 1 -#define kVersionMinor 1 +#define kVersionMinor 2 #endif /*SCREENRECORD_SCREENRECORD_H*/ diff --git a/include/media/AudioBufferProvider.h b/include/media/AudioBufferProvider.h index ef392f0..7be449c 100644 --- a/include/media/AudioBufferProvider.h +++ b/include/media/AudioBufferProvider.h @@ -61,6 +61,17 @@ public: // buffer->frameCount 0 virtual status_t getNextBuffer(Buffer* buffer, int64_t pts = kInvalidPTS) = 0; + // Release (a portion of) the buffer previously obtained by getNextBuffer(). + // It is permissible to call releaseBuffer() multiple times per getNextBuffer(). + // On entry: + // buffer->frameCount number of frames to release, must be <= number of frames + // obtained but not yet released + // buffer->raw unused + // On return: + // buffer->frameCount 0; implementation MUST set to zero + // buffer->raw undefined; implementation is PERMITTED to set to any value, + // so if caller needs to continue using this buffer it must + // keep track of the pointer itself virtual void releaseBuffer(Buffer* buffer) = 0; }; diff --git a/include/media/AudioRecord.h b/include/media/AudioRecord.h index b192bd3..0439cb0 100644 --- a/include/media/AudioRecord.h +++ b/include/media/AudioRecord.h @@ -412,6 +412,7 @@ private: bool mPaused; // whether thread is requested to pause at next loop entry bool mPausedInt; // whether thread internally requests pause nsecs_t mPausedNs; // if mPausedInt then associated timeout, otherwise ignored + bool mIgnoreNextPausedInt; // whether to ignore next mPausedInt request }; // body of AudioRecordThread::threadLoop() @@ -422,7 +423,7 @@ private: // NS_INACTIVE inactive so don't run again until re-started // NS_NEVER never again static const nsecs_t NS_WHENEVER = -1, NS_INACTIVE = -2, NS_NEVER = -3; - nsecs_t processAudioBuffer(const sp<AudioRecordThread>& thread); + nsecs_t processAudioBuffer(); // caller must hold lock on mLock for all _l methods status_t openRecord_l(size_t epoch); diff --git a/include/media/AudioSystem.h b/include/media/AudioSystem.h index b96b8a1..ca9aaf7 100644 --- a/include/media/AudioSystem.h +++ b/include/media/AudioSystem.h @@ -67,20 +67,24 @@ public: // returns true in *state if tracks are active on the specified stream or have been active // in the past inPastMs milliseconds - static status_t isStreamActive(audio_stream_type_t stream, bool *state, uint32_t inPastMs = 0); + static status_t isStreamActive(audio_stream_type_t stream, bool *state, uint32_t inPastMs); // returns true in *state if tracks are active for what qualifies as remote playback // on the specified stream or have been active in the past inPastMs milliseconds. Remote // playback isn't mutually exclusive with local playback. static status_t isStreamActiveRemotely(audio_stream_type_t stream, bool *state, - uint32_t inPastMs = 0); + uint32_t inPastMs); // returns true in *state if a recorder is currently recording with the specified source static status_t isSourceActive(audio_source_t source, bool *state); // set/get audio hardware parameters. The function accepts a list of parameters // key value pairs in the form: key1=value1;key2=value2;... // Some keys are reserved for standard parameters (See AudioParameter class). + // The versions with audio_io_handle_t are intended for internal media framework use only. static status_t setParameters(audio_io_handle_t ioHandle, const String8& keyValuePairs); static String8 getParameters(audio_io_handle_t ioHandle, const String8& keys); + // The versions without audio_io_handle_t are intended for JNI. + static status_t setParameters(const String8& keyValuePairs); + static String8 getParameters(const String8& keys); static void setErrorCallback(audio_error_callback cb); @@ -90,12 +94,14 @@ public: static float linearToLog(int volume); static int logToLinear(float volume); + // Returned samplingRate and frameCount output values are guaranteed + // to be non-zero if status == NO_ERROR static status_t getOutputSamplingRate(uint32_t* samplingRate, - audio_stream_type_t stream = AUDIO_STREAM_DEFAULT); + audio_stream_type_t stream); static status_t getOutputFrameCount(size_t* frameCount, - audio_stream_type_t stream = AUDIO_STREAM_DEFAULT); + audio_stream_type_t stream); static status_t getOutputLatency(uint32_t* latency, - audio_stream_type_t stream = AUDIO_STREAM_DEFAULT); + audio_stream_type_t stream); static status_t getSamplingRate(audio_io_handle_t output, audio_stream_type_t streamType, uint32_t* samplingRate); @@ -202,16 +208,16 @@ public: const audio_offload_info_t *offloadInfo = NULL); static status_t startOutput(audio_io_handle_t output, audio_stream_type_t stream, - int session = 0); + int session); static status_t stopOutput(audio_io_handle_t output, audio_stream_type_t stream, - int session = 0); + int session); static void releaseOutput(audio_io_handle_t output); static audio_io_handle_t getInput(audio_source_t inputSource, - uint32_t samplingRate = 0, - audio_format_t format = AUDIO_FORMAT_DEFAULT, - audio_channel_mask_t channelMask = AUDIO_CHANNEL_IN_MONO, - int sessionId = 0); + uint32_t samplingRate, + audio_format_t format, + audio_channel_mask_t channelMask, + int sessionId); static status_t startInput(audio_io_handle_t input); static status_t stopInput(audio_io_handle_t input); static void releaseInput(audio_io_handle_t input); diff --git a/include/media/AudioTrack.h b/include/media/AudioTrack.h index 2717549..96135a6 100644 --- a/include/media/AudioTrack.h +++ b/include/media/AudioTrack.h @@ -123,6 +123,8 @@ public: * - NO_ERROR: successful operation * - NO_INIT: audio server or audio hardware not initialized * - BAD_VALUE: unsupported configuration + * frameCount is guaranteed to be non-zero if status is NO_ERROR, + * and is undefined otherwise. */ static status_t getMinFrameCount(size_t* frameCount, @@ -566,7 +568,7 @@ public: uint32_t getUnderrunFrames() const; /* Get the flags */ - audio_output_flags_t getFlags() const { return mFlags; } + audio_output_flags_t getFlags() const { AutoMutex _l(mLock); return mFlags; } /* Set parameters - only possible when using direct output */ status_t setParameters(const String8& keyValuePairs); @@ -626,9 +628,9 @@ protected: // NS_INACTIVE inactive so don't run again until re-started // NS_NEVER never again static const nsecs_t NS_WHENEVER = -1, NS_INACTIVE = -2, NS_NEVER = -3; - nsecs_t processAudioBuffer(const sp<AudioTrackThread>& thread); - status_t processStreamEnd(int32_t waitCount); + nsecs_t processAudioBuffer(); + bool isOffloaded() const; // caller must hold lock on mLock for all _l methods @@ -650,7 +652,7 @@ protected: // FIXME enum is faster than strcmp() for parameter 'from' status_t restoreTrack_l(const char *from); - bool isOffloaded() const + bool isOffloaded_l() const { return (mFlags & AUDIO_OUTPUT_FLAG_COMPRESS_OFFLOAD) != 0; } // Next 3 fields may be changed if IAudioTrack is re-created, but always != 0 @@ -720,6 +722,9 @@ protected: uint32_t mUpdatePeriod; // in frames, zero means no EVENT_NEW_POS audio_output_flags_t mFlags; + // const after set(), except for bits AUDIO_OUTPUT_FLAG_FAST and AUDIO_OUTPUT_FLAG_OFFLOAD. + // mLock must be held to read or write those bits reliably. + int mSessionId; int mAuxEffectId; diff --git a/include/media/stagefright/MetaData.h b/include/media/stagefright/MetaData.h index de3fc36..3a87474 100644 --- a/include/media/stagefright/MetaData.h +++ b/include/media/stagefright/MetaData.h @@ -134,6 +134,7 @@ enum { kKeyRequiresSecureBuffers = 'secu', // bool (int32_t) kKeyIsADTS = 'adts', // bool (int32_t) + kKeyAACAOT = 'aaot', // int32_t // If a MediaBuffer's data represents (at least partially) encrypted // data, the following fields aid in decryption. diff --git a/libvideoeditor/lvpp/VideoEditorPreviewController.cpp b/libvideoeditor/lvpp/VideoEditorPreviewController.cpp index 149c4ea..c3cd3d0 100755 --- a/libvideoeditor/lvpp/VideoEditorPreviewController.cpp +++ b/libvideoeditor/lvpp/VideoEditorPreviewController.cpp @@ -1248,7 +1248,7 @@ void VideoEditorPreviewController::notify( case MEDIA_SET_VIDEO_SIZE: ALOGV("MEDIA_SET_VIDEO_SIZE; New video size %d x %d", ext1, ext2); break; - case 0xAAAAAAAA: + case static_cast<int>(0xAAAAAAAA): ALOGV("VIDEO PLAYBACK ALMOST over, prepare next player"); // Select next player and prepare it // If there is a clip after this one @@ -1268,7 +1268,7 @@ void VideoEditorPreviewController::notify( } } break; - case 0xBBBBBBBB: + case static_cast<int>(0xBBBBBBBB): { ALOGV("VIDEO PLAYBACK, Update Overlay"); int overlayIndex = ext2; diff --git a/libvideoeditor/osal/inc/M4OSA_Error.h b/libvideoeditor/osal/inc/M4OSA_Error.h index 4d59529..75c3177 100755 --- a/libvideoeditor/osal/inc/M4OSA_Error.h +++ b/libvideoeditor/osal/inc/M4OSA_Error.h @@ -57,7 +57,7 @@ typedef M4OSA_UInt32 M4OSA_ERR; * @arg coreID: (IN) [M4OSA_UInt32] CoreID to put in the error code * @arg errorID: (IN) [M4OSA_UInt32] ErrorID to put in the error code*/ #define M4OSA_ERR_CREATE(severity, coreID, errorID)\ - (M4OSA_Int32)((((M4OSA_UInt32)severity)<<30)+((((M4OSA_UInt32)coreID)&0x003FFF)<<16)+(((M4OSA_UInt32)errorID)&0x00FFFF)) + (M4OSA_UInt32)((((M4OSA_UInt32)severity)<<30)+((((M4OSA_UInt32)coreID)&0x003FFF)<<16)+(((M4OSA_UInt32)errorID)&0x00FFFF)) /** This macro extracts the 3 fields from the error: * @arg error: (IN) [M4OSA_ERR] Error code diff --git a/libvideoeditor/vss/stagefrightshells/src/VideoEditor3gpReader.cpp b/libvideoeditor/vss/stagefrightshells/src/VideoEditor3gpReader.cpp index 3c8915a..99cf9ec 100755 --- a/libvideoeditor/vss/stagefrightshells/src/VideoEditor3gpReader.cpp +++ b/libvideoeditor/vss/stagefrightshells/src/VideoEditor3gpReader.cpp @@ -776,16 +776,16 @@ M4OSA_ERR VideoEditor3gpReader_setOption(M4OSA_Context context, case M4READER_kOptionID_SetOsaFileReaderFctsPtr: break; - case M4READER_3GP_kOptionID_AudioOnly: + case static_cast<M4OSA_OptionID>(M4READER_3GP_kOptionID_AudioOnly): break; - case M4READER_3GP_kOptionID_VideoOnly: + case static_cast<M4OSA_OptionID>(M4READER_3GP_kOptionID_VideoOnly): break; - case M4READER_3GP_kOptionID_FastOpenMode: + case static_cast<M4OSA_OptionID>(M4READER_3GP_kOptionID_FastOpenMode): break; - case M4READER_kOptionID_MaxMetadataSize: + case static_cast<M4OSA_OptionID>(M4READER_kOptionID_MaxMetadataSize): break; default: diff --git a/libvideoeditor/vss/stagefrightshells/src/VideoEditorAudioDecoder.cpp b/libvideoeditor/vss/stagefrightshells/src/VideoEditorAudioDecoder.cpp index 9b35d07..e4c7ea1 100755 --- a/libvideoeditor/vss/stagefrightshells/src/VideoEditorAudioDecoder.cpp +++ b/libvideoeditor/vss/stagefrightshells/src/VideoEditorAudioDecoder.cpp @@ -809,7 +809,7 @@ M4OSA_ERR VideoEditorAudioDecoder_setOption(M4AD_Context pContext, pDecoderContext = (VideoEditorAudioDecoder_Context*)pContext; switch( optionID ) { - case M4AD_kOptionID_UserParam: + case static_cast<M4OSA_UInt32>(M4AD_kOptionID_UserParam): ALOGV("VideoEditorAudioDecodersetOption UserParam is not supported"); err = M4ERR_NOT_IMPLEMENTED; break; diff --git a/media/libeffects/loudness/EffectLoudnessEnhancer.cpp b/media/libeffects/loudness/EffectLoudnessEnhancer.cpp index 91ed677..3c2b320 100644 --- a/media/libeffects/loudness/EffectLoudnessEnhancer.cpp +++ b/media/libeffects/loudness/EffectLoudnessEnhancer.cpp @@ -453,13 +453,13 @@ const struct effect_interface_s gLEInterface = { // This is the only symbol that needs to be exported __attribute__ ((visibility ("default"))) audio_effect_library_t AUDIO_EFFECT_LIBRARY_INFO_SYM = { - tag : AUDIO_EFFECT_LIBRARY_TAG, - version : EFFECT_LIBRARY_API_VERSION, - name : "Loudness Enhancer Library", - implementor : "The Android Open Source Project", - create_effect : LELib_Create, - release_effect : LELib_Release, - get_descriptor : LELib_GetDescriptor, + .tag = AUDIO_EFFECT_LIBRARY_TAG, + .version = EFFECT_LIBRARY_API_VERSION, + .name = "Loudness Enhancer Library", + .implementor = "The Android Open Source Project", + .create_effect = LELib_Create, + .release_effect = LELib_Release, + .get_descriptor = LELib_GetDescriptor, }; }; // extern "C" diff --git a/media/libeffects/proxy/EffectProxy.cpp b/media/libeffects/proxy/EffectProxy.cpp index dd4ad08..e6faaaf 100644 --- a/media/libeffects/proxy/EffectProxy.cpp +++ b/media/libeffects/proxy/EffectProxy.cpp @@ -329,11 +329,11 @@ int Effect_getDescriptor(effect_handle_t self, __attribute__ ((visibility ("default"))) audio_effect_library_t AUDIO_EFFECT_LIBRARY_INFO_SYM = { - tag : AUDIO_EFFECT_LIBRARY_TAG, - version : EFFECT_LIBRARY_API_VERSION, - name : "Effect Proxy", - implementor : "AOSP", - create_effect : android::EffectProxyCreate, - release_effect : android::EffectProxyRelease, - get_descriptor : android::EffectProxyGetDescriptor, + .tag = AUDIO_EFFECT_LIBRARY_TAG, + .version = EFFECT_LIBRARY_API_VERSION, + .name = "Effect Proxy", + .implementor = "AOSP", + .create_effect = android::EffectProxyCreate, + .release_effect = android::EffectProxyRelease, + .get_descriptor = android::EffectProxyGetDescriptor, }; diff --git a/media/libmedia/AudioRecord.cpp b/media/libmedia/AudioRecord.cpp index df99147..e39a475 100644 --- a/media/libmedia/AudioRecord.cpp +++ b/media/libmedia/AudioRecord.cpp @@ -692,7 +692,7 @@ ssize_t AudioRecord::read(void* buffer, size_t userSize) // ------------------------------------------------------------------------- -nsecs_t AudioRecord::processAudioBuffer(const sp<AudioRecordThread>& thread) +nsecs_t AudioRecord::processAudioBuffer() { mLock.lock(); if (mAwaitBoost) { @@ -954,7 +954,7 @@ status_t AudioRecord::restoreRecord_l(const char *from) // ========================================================================= -void AudioRecord::DeathNotifier::binderDied(const wp<IBinder>& who) +void AudioRecord::DeathNotifier::binderDied(const wp<IBinder>& who __unused) { sp<AudioRecord> audioRecord = mAudioRecord.promote(); if (audioRecord != 0) { @@ -966,7 +966,8 @@ void AudioRecord::DeathNotifier::binderDied(const wp<IBinder>& who) // ========================================================================= AudioRecord::AudioRecordThread::AudioRecordThread(AudioRecord& receiver, bool bCanCallJava) - : Thread(bCanCallJava), mReceiver(receiver), mPaused(true), mPausedInt(false), mPausedNs(0LL) + : Thread(bCanCallJava), mReceiver(receiver), mPaused(true), mPausedInt(false), mPausedNs(0LL), + mIgnoreNextPausedInt(false) { } @@ -983,6 +984,10 @@ bool AudioRecord::AudioRecordThread::threadLoop() // caller will check for exitPending() return true; } + if (mIgnoreNextPausedInt) { + mIgnoreNextPausedInt = false; + mPausedInt = false; + } if (mPausedInt) { if (mPausedNs > 0) { (void) mMyCond.waitRelative(mMyLock, mPausedNs); @@ -993,7 +998,7 @@ bool AudioRecord::AudioRecordThread::threadLoop() return true; } } - nsecs_t ns = mReceiver.processAudioBuffer(this); + nsecs_t ns = mReceiver.processAudioBuffer(); switch (ns) { case 0: return true; @@ -1017,12 +1022,7 @@ void AudioRecord::AudioRecordThread::requestExit() { // must be in this order to avoid a race condition Thread::requestExit(); - AutoMutex _l(mMyLock); - if (mPaused || mPausedInt) { - mPaused = false; - mPausedInt = false; - mMyCond.signal(); - } + resume(); } void AudioRecord::AudioRecordThread::pause() @@ -1034,6 +1034,7 @@ void AudioRecord::AudioRecordThread::pause() void AudioRecord::AudioRecordThread::resume() { AutoMutex _l(mMyLock); + mIgnoreNextPausedInt = true; if (mPaused || mPausedInt) { mPaused = false; mPausedInt = false; diff --git a/media/libmedia/AudioSystem.cpp b/media/libmedia/AudioSystem.cpp index 8033c2c..4580030 100644 --- a/media/libmedia/AudioSystem.cpp +++ b/media/libmedia/AudioSystem.cpp @@ -40,10 +40,10 @@ audio_error_callback AudioSystem::gAudioErrorCallback = NULL; DefaultKeyedVector<audio_io_handle_t, AudioSystem::OutputDescriptor *> AudioSystem::gOutputs(0); // Cached values for recording queries, all protected by gLock -uint32_t AudioSystem::gPrevInSamplingRate = 16000; -audio_format_t AudioSystem::gPrevInFormat = AUDIO_FORMAT_PCM_16_BIT; -audio_channel_mask_t AudioSystem::gPrevInChannelMask = AUDIO_CHANNEL_IN_MONO; -size_t AudioSystem::gInBuffSize = 0; +uint32_t AudioSystem::gPrevInSamplingRate; +audio_format_t AudioSystem::gPrevInFormat; +audio_channel_mask_t AudioSystem::gPrevInChannelMask; +size_t AudioSystem::gInBuffSize = 0; // zero indicates cache is invalid // establish binder interface to AudioFlinger service @@ -190,6 +190,16 @@ String8 AudioSystem::getParameters(audio_io_handle_t ioHandle, const String8& ke return result; } +status_t AudioSystem::setParameters(const String8& keyValuePairs) +{ + return setParameters((audio_io_handle_t) 0, keyValuePairs); +} + +String8 AudioSystem::getParameters(const String8& keys) +{ + return getParameters((audio_io_handle_t) 0, keys); +} + // convert volume steps to natural log scale // change this value to change volume scaling @@ -249,6 +259,11 @@ status_t AudioSystem::getSamplingRate(audio_io_handle_t output, *samplingRate = outputDesc->samplingRate; gLock.unlock(); } + if (*samplingRate == 0) { + ALOGE("AudioSystem::getSamplingRate failed for output %d stream type %d", + output, streamType); + return BAD_VALUE; + } ALOGV("getSamplingRate() streamType %d, output %d, sampling rate %u", streamType, output, *samplingRate); @@ -289,6 +304,11 @@ status_t AudioSystem::getFrameCount(audio_io_handle_t output, *frameCount = outputDesc->frameCount; gLock.unlock(); } + if (*frameCount == 0) { + ALOGE("AudioSystem::getFrameCount failed for output %d stream type %d", + output, streamType); + return BAD_VALUE; + } ALOGV("getFrameCount() streamType %d, output %d, frameCount %d", streamType, output, *frameCount); @@ -349,6 +369,12 @@ status_t AudioSystem::getInputBufferSize(uint32_t sampleRate, audio_format_t for return PERMISSION_DENIED; } inBuffSize = af->getInputBufferSize(sampleRate, format, channelMask); + if (inBuffSize == 0) { + ALOGE("AudioSystem::getInputBufferSize failed sampleRate %d format %x channelMask %x", + sampleRate, format, channelMask); + return BAD_VALUE; + } + // A benign race is possible here: we could overwrite a fresher cache entry gLock.lock(); // save the request params gPrevInSamplingRate = sampleRate; @@ -419,7 +445,7 @@ void AudioSystem::releaseAudioSessionId(int audioSession) { // --------------------------------------------------------------------------- -void AudioSystem::AudioFlingerClient::binderDied(const wp<IBinder>& who) { +void AudioSystem::AudioFlingerClient::binderDied(const wp<IBinder>& who __unused) { Mutex::Autolock _l(AudioSystem::gLock); AudioSystem::gAudioFlinger.clear(); @@ -804,7 +830,7 @@ bool AudioSystem::isOffloadSupported(const audio_offload_info_t& info) // --------------------------------------------------------------------------- -void AudioSystem::AudioPolicyServiceClient::binderDied(const wp<IBinder>& who) { +void AudioSystem::AudioPolicyServiceClient::binderDied(const wp<IBinder>& who __unused) { Mutex::Autolock _l(AudioSystem::gLock); AudioSystem::gAudioPolicyService.clear(); diff --git a/media/libmedia/AudioTrack.cpp b/media/libmedia/AudioTrack.cpp index b48cb1f..8954d9f 100644 --- a/media/libmedia/AudioTrack.cpp +++ b/media/libmedia/AudioTrack.cpp @@ -44,9 +44,6 @@ status_t AudioTrack::getMinFrameCount( return BAD_VALUE; } - // default to 0 in case of error - *frameCount = 0; - // FIXME merge with similar code in createTrack_l(), except we're missing // some information here that is available in createTrack_l(): // audio_io_handle_t output @@ -54,16 +51,20 @@ status_t AudioTrack::getMinFrameCount( // audio_channel_mask_t channelMask // audio_output_flags_t flags uint32_t afSampleRate; - if (AudioSystem::getOutputSamplingRate(&afSampleRate, streamType) != NO_ERROR) { - return NO_INIT; + status_t status; + status = AudioSystem::getOutputSamplingRate(&afSampleRate, streamType); + if (status != NO_ERROR) { + return status; } size_t afFrameCount; - if (AudioSystem::getOutputFrameCount(&afFrameCount, streamType) != NO_ERROR) { - return NO_INIT; + status = AudioSystem::getOutputFrameCount(&afFrameCount, streamType); + if (status != NO_ERROR) { + return status; } uint32_t afLatency; - if (AudioSystem::getOutputLatency(&afLatency, streamType) != NO_ERROR) { - return NO_INIT; + status = AudioSystem::getOutputLatency(&afLatency, streamType); + if (status != NO_ERROR) { + return status; } // Ensure that buffer depth covers at least audio hardware latency @@ -74,6 +75,13 @@ status_t AudioTrack::getMinFrameCount( *frameCount = (sampleRate == 0) ? afFrameCount * minBufCount : afFrameCount * minBufCount * sampleRate / afSampleRate; + // The formula above should always produce a non-zero value, but return an error + // in the unlikely event that it does not, as that's part of the API contract. + if (*frameCount == 0) { + ALOGE("AudioTrack::getMinFrameCount failed for streamType %d, sampleRate %d", + streamType, sampleRate); + return BAD_VALUE; + } ALOGV("getMinFrameCount=%d: afFrameCount=%d, minBufCount=%d, afSampleRate=%d, afLatency=%d", *frameCount, afFrameCount, minBufCount, afSampleRate, afLatency); return NO_ERROR; @@ -237,12 +245,14 @@ status_t AudioTrack::set( streamType = AUDIO_STREAM_MUSIC; } + status_t status; if (sampleRate == 0) { - uint32_t afSampleRate; - if (AudioSystem::getOutputSamplingRate(&afSampleRate, streamType) != NO_ERROR) { - return NO_INIT; + status = AudioSystem::getOutputSamplingRate(&sampleRate, streamType); + if (status != NO_ERROR) { + ALOGE("Could not get output sample rate for stream type %d; status %d", + streamType, status); + return status; } - sampleRate = afSampleRate; } mSampleRate = sampleRate; @@ -330,7 +340,7 @@ status_t AudioTrack::set( } // create the IAudioTrack - status_t status = createTrack_l(streamType, + status = createTrack_l(streamType, sampleRate, format, frameCount, @@ -447,7 +457,7 @@ void AudioTrack::stop() return; } - if (isOffloaded()) { + if (isOffloaded_l()) { mState = STATE_STOPPING; } else { mState = STATE_STOPPED; @@ -469,7 +479,7 @@ void AudioTrack::stop() sp<AudioTrackThread> t = mAudioTrackThread; if (t != 0) { - if (!isOffloaded()) { + if (!isOffloaded_l()) { t->pause(); } } else { @@ -507,7 +517,7 @@ void AudioTrack::flush_l() mRefreshRemaining = true; mState = STATE_FLUSHED; - if (isOffloaded()) { + if (isOffloaded_l()) { mProxy->interrupt(); } mProxy->flush(); @@ -540,7 +550,7 @@ status_t AudioTrack::setVolume(float left, float right) mProxy->setVolumeLR((uint32_t(uint16_t(right * 0x1000)) << 16) | uint16_t(left * 0x1000)); - if (isOffloaded()) { + if (isOffloaded_l()) { mAudioTrack->signal(); } return NO_ERROR; @@ -604,7 +614,7 @@ uint32_t AudioTrack::getSampleRate() const // sample rate can be updated during playback by the offloaded decoder so we need to // query the HAL and update if needed. // FIXME use Proxy return channel to update the rate from server and avoid polling here - if (isOffloaded()) { + if (isOffloaded_l()) { if (mOutput != 0) { uint32_t sampleRate = 0; status_t status = AudioSystem::getSamplingRate(mOutput, mStreamType, &sampleRate); @@ -741,7 +751,7 @@ status_t AudioTrack::getPosition(uint32_t *position) const } AutoMutex lock(mLock); - if (isOffloaded()) { + if (isOffloaded_l()) { uint32_t dspFrames = 0; if (mOutput != 0) { @@ -1341,7 +1351,7 @@ status_t TimedAudioTrack::setMediaTimeTransform(const LinearTransform& xform, // ------------------------------------------------------------------------- -nsecs_t AudioTrack::processAudioBuffer(const sp<AudioTrackThread>& thread) +nsecs_t AudioTrack::processAudioBuffer() { // Currently the AudioTrack thread is not created if there are no callbacks. // Would it ever make sense to run the thread, even without callbacks? @@ -1379,7 +1389,7 @@ nsecs_t AudioTrack::processAudioBuffer(const sp<AudioTrackThread>& thread) // for offloaded tracks restoreTrack_l() will just update the sequence and clear // AudioSystem cache. We should not exit here but after calling the callback so // that the upper layers can recreate the track - if (!isOffloaded() || (mSequence == mObservedSequence)) { + if (!isOffloaded_l() || (mSequence == mObservedSequence)) { status_t status = restoreTrack_l("processAudioBuffer"); mLock.unlock(); // Run again immediately, but with a new IAudioTrack @@ -1666,7 +1676,7 @@ nsecs_t AudioTrack::processAudioBuffer(const sp<AudioTrackThread>& thread) status_t AudioTrack::restoreTrack_l(const char *from) { ALOGW("dead IAudioTrack, %s, creating a new one from %s()", - isOffloaded() ? "Offloaded" : "PCM", from); + isOffloaded_l() ? "Offloaded" : "PCM", from); ++mSequence; status_t result; @@ -1674,7 +1684,8 @@ status_t AudioTrack::restoreTrack_l(const char *from) // output parameters in getOutput_l() and createTrack_l() AudioSystem::clearAudioConfigCache(); - if (isOffloaded()) { + if (isOffloaded_l()) { + // FIXME re-creation of offloaded tracks is not yet implemented return DEAD_OBJECT; } @@ -1760,14 +1771,21 @@ status_t AudioTrack::getTimestamp(AudioTimestamp& timestamp) String8 AudioTrack::getParameters(const String8& keys) { - if (mOutput) { - return AudioSystem::getParameters(mOutput, keys); + audio_io_handle_t output = getOutput(); + if (output != 0) { + return AudioSystem::getParameters(output, keys); } else { return String8::empty(); } } -status_t AudioTrack::dump(int fd, const Vector<String16>& args) const +bool AudioTrack::isOffloaded() const +{ + AutoMutex lock(mLock); + return isOffloaded_l(); +} + +status_t AudioTrack::dump(int fd, const Vector<String16>& args __unused) const { const size_t SIZE = 256; @@ -1797,7 +1815,7 @@ uint32_t AudioTrack::getUnderrunFrames() const // ========================================================================= -void AudioTrack::DeathNotifier::binderDied(const wp<IBinder>& who) +void AudioTrack::DeathNotifier::binderDied(const wp<IBinder>& who __unused) { sp<AudioTrack> audioTrack = mAudioTrack.promote(); if (audioTrack != 0) { @@ -1841,7 +1859,7 @@ bool AudioTrack::AudioTrackThread::threadLoop() return true; } } - nsecs_t ns = mReceiver.processAudioBuffer(this); + nsecs_t ns = mReceiver.processAudioBuffer(); switch (ns) { case 0: return true; diff --git a/media/libmedia/AudioTrackShared.cpp b/media/libmedia/AudioTrackShared.cpp index caa7900..7a1e207 100644 --- a/media/libmedia/AudioTrackShared.cpp +++ b/media/libmedia/AudioTrackShared.cpp @@ -765,7 +765,7 @@ ssize_t StaticAudioTrackServerProxy::pollPosition() return (ssize_t) position; } -status_t StaticAudioTrackServerProxy::obtainBuffer(Buffer* buffer, bool ackFlush) +status_t StaticAudioTrackServerProxy::obtainBuffer(Buffer* buffer, bool ackFlush __unused) { if (mIsShutdown) { buffer->mFrameCount = 0; @@ -847,7 +847,7 @@ void StaticAudioTrackServerProxy::releaseBuffer(Buffer* buffer) buffer->mNonContig = 0; } -void StaticAudioTrackServerProxy::tallyUnderrunFrames(uint32_t frameCount) +void StaticAudioTrackServerProxy::tallyUnderrunFrames(uint32_t frameCount __unused) { // Unlike AudioTrackServerProxy::tallyUnderrunFrames() used for streaming tracks, // we don't have a location to count underrun frames. The underrun frame counter diff --git a/media/libmedia/IMediaDeathNotifier.cpp b/media/libmedia/IMediaDeathNotifier.cpp index 9db5b1b..10b4934 100644 --- a/media/libmedia/IMediaDeathNotifier.cpp +++ b/media/libmedia/IMediaDeathNotifier.cpp @@ -75,7 +75,7 @@ IMediaDeathNotifier::removeObitRecipient(const wp<IMediaDeathNotifier>& recipien } void -IMediaDeathNotifier::DeathNotifier::binderDied(const wp<IBinder>& who) { +IMediaDeathNotifier::DeathNotifier::binderDied(const wp<IBinder>& who __unused) { ALOGW("media server died"); // Need to do this with the lock held diff --git a/media/libmedia/SoundPool.cpp b/media/libmedia/SoundPool.cpp index b420c95..98acd1f 100644 --- a/media/libmedia/SoundPool.cpp +++ b/media/libmedia/SoundPool.cpp @@ -199,7 +199,7 @@ SoundChannel* SoundPool::findNextChannel(int channelID) return NULL; } -int SoundPool::load(const char* path, int priority) +int SoundPool::load(const char* path, int priority __unused) { ALOGV("load: path=%s, priority=%d", path, priority); Mutex::Autolock lock(&mLock); @@ -209,7 +209,7 @@ int SoundPool::load(const char* path, int priority) return sample->sampleID(); } -int SoundPool::load(int fd, int64_t offset, int64_t length, int priority) +int SoundPool::load(int fd, int64_t offset, int64_t length, int priority __unused) { ALOGV("load: fd=%d, offset=%lld, length=%lld, priority=%d", fd, offset, length, priority); diff --git a/media/libmedia/mediametadataretriever.cpp b/media/libmedia/mediametadataretriever.cpp index 110b94c..bad2494 100644 --- a/media/libmedia/mediametadataretriever.cpp +++ b/media/libmedia/mediametadataretriever.cpp @@ -157,7 +157,7 @@ sp<IMemory> MediaMetadataRetriever::extractAlbumArt() return mRetriever->extractAlbumArt(); } -void MediaMetadataRetriever::DeathNotifier::binderDied(const wp<IBinder>& who) { +void MediaMetadataRetriever::DeathNotifier::binderDied(const wp<IBinder>& who __unused) { Mutex::Autolock lock(MediaMetadataRetriever::sServiceLock); MediaMetadataRetriever::sService.clear(); ALOGW("MediaMetadataRetriever server died!"); diff --git a/media/libmedia/mediaplayer.cpp b/media/libmedia/mediaplayer.cpp index 0f6d897..7a6f31d 100644 --- a/media/libmedia/mediaplayer.cpp +++ b/media/libmedia/mediaplayer.cpp @@ -654,7 +654,7 @@ status_t MediaPlayer::setRetransmitEndpoint(const char* addrString, return BAD_VALUE; } - memset(&mRetransmitEndpoint, 0, sizeof(&mRetransmitEndpoint)); + memset(&mRetransmitEndpoint, 0, sizeof(mRetransmitEndpoint)); mRetransmitEndpoint.sin_family = AF_INET; mRetransmitEndpoint.sin_addr = saddr; mRetransmitEndpoint.sin_port = htons(port); diff --git a/media/libnbaio/MonoPipe.cpp b/media/libnbaio/MonoPipe.cpp index de0ad28..3c61b60 100644 --- a/media/libnbaio/MonoPipe.cpp +++ b/media/libnbaio/MonoPipe.cpp @@ -183,7 +183,7 @@ ssize_t MonoPipe::write(const void *buffer, size_t count) } } if (ns > 0) { - const struct timespec req = {0, ns}; + const struct timespec req = {0, static_cast<long>(ns)}; nanosleep(&req, NULL); } // record the time that this write() completed diff --git a/media/libstagefright/MPEG4Extractor.cpp b/media/libstagefright/MPEG4Extractor.cpp index 9b36b6a..9c89e82 100644 --- a/media/libstagefright/MPEG4Extractor.cpp +++ b/media/libstagefright/MPEG4Extractor.cpp @@ -2372,6 +2372,58 @@ status_t MPEG4Extractor::verifyTrack(Track *track) { return OK; } +typedef enum { + //AOT_NONE = -1, + //AOT_NULL_OBJECT = 0, + //AOT_AAC_MAIN = 1, /**< Main profile */ + AOT_AAC_LC = 2, /**< Low Complexity object */ + //AOT_AAC_SSR = 3, + //AOT_AAC_LTP = 4, + AOT_SBR = 5, + //AOT_AAC_SCAL = 6, + //AOT_TWIN_VQ = 7, + //AOT_CELP = 8, + //AOT_HVXC = 9, + //AOT_RSVD_10 = 10, /**< (reserved) */ + //AOT_RSVD_11 = 11, /**< (reserved) */ + //AOT_TTSI = 12, /**< TTSI Object */ + //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */ + //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */ + //AOT_GEN_MIDI = 15, /**< General MIDI object */ + //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */ + AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */ + //AOT_RSVD_18 = 18, /**< (reserved) */ + //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */ + AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */ + //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */ + AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */ + AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */ + //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */ + //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */ + //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */ + //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */ + //AOT_RSVD_28 = 28, /**< might become SSC */ + AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */ + //AOT_MPEGS = 30, /**< MPEG Surround */ + + AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */ + + //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */ + //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */ + //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */ + //AOT_RSVD_35 = 35, /**< might become DST */ + //AOT_RSVD_36 = 36, /**< might become ALS */ + //AOT_AAC_SLS = 37, /**< AAC + SLS */ + //AOT_SLS = 38, /**< SLS */ + //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */ + + //AOT_USAC = 42, /**< USAC */ + //AOT_SAOC = 43, /**< SAOC */ + //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */ + + //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */ +} AUDIO_OBJECT_TYPE; + status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( const void *esds_data, size_t esds_size) { ESDS esds(esds_data, esds_size); @@ -2419,6 +2471,11 @@ status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( return ERROR_MALFORMED; } + static uint32_t kSamplingRate[] = { + 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, + 16000, 12000, 11025, 8000, 7350 + }; + ABitReader br(csd, csd_size); uint32_t objectType = br.getBits(5); @@ -2426,6 +2483,9 @@ status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( objectType = 32 + br.getBits(6); } + //keep AOT type + mLastTrack->meta->setInt32(kKeyAACAOT, objectType); + uint32_t freqIndex = br.getBits(4); int32_t sampleRate = 0; @@ -2438,28 +2498,134 @@ status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio( numChannels = br.getBits(4); } else { numChannels = br.getBits(4); - if (objectType == 5) { - // SBR specific config per 14496-3 table 1.13 - freqIndex = br.getBits(4); - if (freqIndex == 15) { - if (csd_size < 8) { - return ERROR_MALFORMED; - } - sampleRate = br.getBits(24); - } + + if (freqIndex == 13 || freqIndex == 14) { + return ERROR_MALFORMED; } - if (sampleRate == 0) { - static uint32_t kSamplingRate[] = { - 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, - 16000, 12000, 11025, 8000, 7350 - }; + sampleRate = kSamplingRate[freqIndex]; + } - if (freqIndex == 13 || freqIndex == 14) { + if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13 + uint32_t extFreqIndex = br.getBits(4); + int32_t extSampleRate; + if (extFreqIndex == 15) { + if (csd_size < 8) { + return ERROR_MALFORMED; + } + extSampleRate = br.getBits(24); + } else { + if (extFreqIndex == 13 || extFreqIndex == 14) { return ERROR_MALFORMED; } + extSampleRate = kSamplingRate[extFreqIndex]; + } + //TODO: save the extension sampling rate value in meta data => + // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate); + } + + switch (numChannels) { + // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration + case 0: + case 1:// FC + case 2:// FL FR + case 3:// FC, FL FR + case 4:// FC, FL FR, RC + case 5:// FC, FL FR, SL SR + case 6:// FC, FL FR, SL SR, LFE + //numChannels already contains the right value + break; + case 11:// FC, FL FR, SL SR, RC, LFE + numChannels = 7; + break; + case 7: // FC, FCL FCR, FL FR, SL SR, LFE + case 12:// FC, FL FR, SL SR, RL RR, LFE + case 14:// FC, FL FR, SL SR, LFE, FHL FHR + numChannels = 8; + break; + default: + return ERROR_UNSUPPORTED; + } + + { + if (objectType == AOT_SBR || objectType == AOT_PS) { + const int32_t extensionSamplingFrequency = br.getBits(4); + objectType = br.getBits(5); + + if (objectType == AOT_ESCAPE) { + objectType = 32 + br.getBits(6); + } + } + if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC || + objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL || + objectType == AOT_ER_BSAC) { + const int32_t frameLengthFlag = br.getBits(1); + + const int32_t dependsOnCoreCoder = br.getBits(1); + + if (dependsOnCoreCoder ) { + const int32_t coreCoderDelay = br.getBits(14); + } + + const int32_t extensionFlag = br.getBits(1); + + if (numChannels == 0 ) { + int32_t channelsEffectiveNum = 0; + int32_t channelsNum = 0; + const int32_t ElementInstanceTag = br.getBits(4); + const int32_t Profile = br.getBits(2); + const int32_t SamplingFrequencyIndex = br.getBits(4); + const int32_t NumFrontChannelElements = br.getBits(4); + const int32_t NumSideChannelElements = br.getBits(4); + const int32_t NumBackChannelElements = br.getBits(4); + const int32_t NumLfeChannelElements = br.getBits(2); + const int32_t NumAssocDataElements = br.getBits(3); + const int32_t NumValidCcElements = br.getBits(4); + + const int32_t MonoMixdownPresent = br.getBits(1); + if (MonoMixdownPresent != 0) { + const int32_t MonoMixdownElementNumber = br.getBits(4); + } + + const int32_t StereoMixdownPresent = br.getBits(1); + if (StereoMixdownPresent != 0) { + const int32_t StereoMixdownElementNumber = br.getBits(4); + } + + const int32_t MatrixMixdownIndexPresent = br.getBits(1); + if (MatrixMixdownIndexPresent != 0) { + const int32_t MatrixMixdownIndex = br.getBits(2); + const int32_t PseudoSurroundEnable = br.getBits(1); + } - sampleRate = kSamplingRate[freqIndex]; + int i; + for (i=0; i < NumFrontChannelElements; i++) { + const int32_t FrontElementIsCpe = br.getBits(1); + const int32_t FrontElementTagSelect = br.getBits(4); + channelsNum += FrontElementIsCpe ? 2 : 1; + } + + for (i=0; i < NumSideChannelElements; i++) { + const int32_t SideElementIsCpe = br.getBits(1); + const int32_t SideElementTagSelect = br.getBits(4); + channelsNum += SideElementIsCpe ? 2 : 1; + } + + for (i=0; i < NumBackChannelElements; i++) { + const int32_t BackElementIsCpe = br.getBits(1); + const int32_t BackElementTagSelect = br.getBits(4); + channelsNum += BackElementIsCpe ? 2 : 1; + } + channelsEffectiveNum = channelsNum; + + for (i=0; i < NumLfeChannelElements; i++) { + const int32_t LfeElementTagSelect = br.getBits(4); + channelsNum += 1; + } + ALOGV("mpeg4 audio channelsNum = %d", channelsNum); + ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum); + numChannels = channelsNum; + } } } diff --git a/media/libstagefright/Utils.cpp b/media/libstagefright/Utils.cpp index 9041c21..216a329 100644 --- a/media/libstagefright/Utils.cpp +++ b/media/libstagefright/Utils.cpp @@ -562,6 +562,17 @@ bool canOffloadStream(const sp<MetaData>& meta, bool hasVideo, return false; } + // check whether it is ELD/LD content -> no offloading + // FIXME: this should depend on audio DSP capabilities. mapMimeToAudioFormat() should use the + // metadata to refine the AAC format and the audio HAL should only list supported profiles. + int32_t aacaot = -1; + if (meta->findInt32(kKeyAACAOT, &aacaot)) { + if (aacaot == 23 || aacaot == 39 ) { + ALOGV("track of type '%s' is ELD/LD content", mime); + return false; + } + } + int32_t srate = -1; if (!meta->findInt32(kKeySampleRate, &srate)) { ALOGV("track of type '%s' does not publish sample rate", mime); diff --git a/media/libstagefright/codecs/aacdec/SoftAAC2.cpp b/media/libstagefright/codecs/aacdec/SoftAAC2.cpp index f842e27..2f5eff4 100644 --- a/media/libstagefright/codecs/aacdec/SoftAAC2.cpp +++ b/media/libstagefright/codecs/aacdec/SoftAAC2.cpp @@ -30,7 +30,7 @@ #define DRC_DEFAULT_MOBILE_REF_LEVEL 64 /* 64*-0.25dB = -16 dB below full scale for mobile conf */ #define DRC_DEFAULT_MOBILE_DRC_CUT 127 /* maximum compression of dynamic range for mobile conf */ #define DRC_DEFAULT_MOBILE_DRC_BOOST 127 /* maximum compression of dynamic range for mobile conf */ -#define MAX_CHANNEL_COUNT 6 /* maximum number of audio channels that can be decoded */ +#define MAX_CHANNEL_COUNT 8 /* maximum number of audio channels that can be decoded */ // names of properties that can be used to override the default DRC settings #define PROP_DRC_OVERRIDE_REF_LEVEL "aac_drc_reference_level" #define PROP_DRC_OVERRIDE_CUT "aac_drc_cut" @@ -296,8 +296,11 @@ void SoftAAC2::maybeConfigureDownmix() const { if (!(property_get("media.aac_51_output_enabled", value, NULL) && (!strcmp(value, "1") || !strcasecmp(value, "true")))) { ALOGI("Downmixing multichannel AAC to stereo"); - aacDecoder_SetParam(mAACDecoder, AAC_PCM_OUTPUT_CHANNELS, 2); + aacDecoder_SetParam(mAACDecoder, AAC_PCM_MAX_OUTPUT_CHANNELS, 2); mStreamInfo->numChannels = 2; + // By default, the decoder creates a 5.1 channel downmix signal + // for seven and eight channel input streams. To enable 6.1 and 7.1 channel output + // use aacDecoder_SetParam(mAACDecoder, AAC_PCM_MAX_OUTPUT_CHANNELS, -1) } } } diff --git a/media/libstagefright/httplive/M3UParser.cpp b/media/libstagefright/httplive/M3UParser.cpp index 243888c..dd248cb 100644 --- a/media/libstagefright/httplive/M3UParser.cpp +++ b/media/libstagefright/httplive/M3UParser.cpp @@ -608,7 +608,7 @@ status_t M3UParser::parseMetaDataDuration( if (meta->get() == NULL) { *meta = new AMessage; } - (*meta)->setInt64(key, (int64_t)x * 1E6); + (*meta)->setInt64(key, (int64_t)(x * 1E6)); return OK; } diff --git a/media/libstagefright/httplive/PlaylistFetcher.cpp b/media/libstagefright/httplive/PlaylistFetcher.cpp index 1754bf2..f095987 100644 --- a/media/libstagefright/httplive/PlaylistFetcher.cpp +++ b/media/libstagefright/httplive/PlaylistFetcher.cpp @@ -861,12 +861,13 @@ status_t PlaylistFetcher::extractAndQueueAccessUnits( && source->dequeueAccessUnit(&accessUnit) == OK) { // Note that we do NOT dequeue any discontinuities. + // for simplicity, store a reference to the format in each unit + sp<MetaData> format = source->getFormat(); + if (format != NULL) { + accessUnit->meta()->setObject("format", format); + } packetSource->queueAccessUnit(accessUnit); } - - if (packetSource->getFormat() == NULL) { - packetSource->setFormat(source->getFormat()); - } } return OK; diff --git a/media/libstagefright/mpeg2ts/AnotherPacketSource.cpp b/media/libstagefright/mpeg2ts/AnotherPacketSource.cpp index 3153c8b..52fb2a5 100644 --- a/media/libstagefright/mpeg2ts/AnotherPacketSource.cpp +++ b/media/libstagefright/mpeg2ts/AnotherPacketSource.cpp @@ -70,7 +70,27 @@ status_t AnotherPacketSource::stop() { } sp<MetaData> AnotherPacketSource::getFormat() { - return mFormat; + Mutex::Autolock autoLock(mLock); + if (mFormat != NULL) { + return mFormat; + } + + List<sp<ABuffer> >::iterator it = mBuffers.begin(); + while (it != mBuffers.end()) { + sp<ABuffer> buffer = *it; + int32_t discontinuity; + if (buffer->meta()->findInt32("discontinuity", &discontinuity)) { + break; + } + + sp<RefBase> object; + if (buffer->meta()->findObject("format", &object)) { + return static_cast<MetaData*>(object.get()); + } + + ++it; + } + return NULL; } status_t AnotherPacketSource::dequeueAccessUnit(sp<ABuffer> *buffer) { @@ -94,6 +114,11 @@ status_t AnotherPacketSource::dequeueAccessUnit(sp<ABuffer> *buffer) { return INFO_DISCONTINUITY; } + sp<RefBase> object; + if ((*buffer)->meta()->findObject("format", &object)) { + mFormat = static_cast<MetaData*>(object.get()); + } + return OK; } @@ -120,17 +145,22 @@ status_t AnotherPacketSource::read( } return INFO_DISCONTINUITY; - } else { - int64_t timeUs; - CHECK(buffer->meta()->findInt64("timeUs", &timeUs)); + } - MediaBuffer *mediaBuffer = new MediaBuffer(buffer); + sp<RefBase> object; + if (buffer->meta()->findObject("format", &object)) { + mFormat = static_cast<MetaData*>(object.get()); + } - mediaBuffer->meta_data()->setInt64(kKeyTime, timeUs); + int64_t timeUs; + CHECK(buffer->meta()->findInt64("timeUs", &timeUs)); - *out = mediaBuffer; - return OK; - } + MediaBuffer *mediaBuffer = new MediaBuffer(buffer); + + mediaBuffer->meta_data()->setInt64(kKeyTime, timeUs); + + *out = mediaBuffer; + return OK; } return mEOSResult; diff --git a/services/audioflinger/Android.mk b/services/audioflinger/Android.mk index 3ec9285..4524d3c 100644 --- a/services/audioflinger/Android.mk +++ b/services/audioflinger/Android.mk @@ -23,7 +23,8 @@ LOCAL_SRC_FILES:= \ AudioPolicyService.cpp \ ServiceUtilities.cpp \ AudioResamplerCubic.cpp.arm \ - AudioResamplerSinc.cpp.arm + AudioResamplerSinc.cpp.arm \ + AudioResamplerDyn.cpp.arm LOCAL_SRC_FILES += StateQueue.cpp @@ -74,10 +75,11 @@ include $(BUILD_SHARED_LIBRARY) include $(CLEAR_VARS) LOCAL_SRC_FILES:= \ - test-resample.cpp \ + test-resample.cpp \ AudioResampler.cpp.arm \ - AudioResamplerCubic.cpp.arm \ - AudioResamplerSinc.cpp.arm + AudioResamplerCubic.cpp.arm \ + AudioResamplerSinc.cpp.arm \ + AudioResamplerDyn.cpp.arm LOCAL_C_INCLUDES := \ $(call include-path-for, audio-utils) diff --git a/services/audioflinger/AudioPolicyService.cpp b/services/audioflinger/AudioPolicyService.cpp index c5ad2c0..c8f0730 100644 --- a/services/audioflinger/AudioPolicyService.cpp +++ b/services/audioflinger/AudioPolicyService.cpp @@ -1441,6 +1441,14 @@ status_t AudioPolicyService::loadPreProcessorConfig(const char *path) loadEffects(root, effects); loadInputSources(root, effects); + // delete effects to fix memory leak. + // as effects is local var and valgrind would treat this as memory leak + // and although it only did in mediaserver init, but free it in case mediaserver reboot + size_t i; + for (i = 0; i < effects.size(); i++) { + delete effects[i]; + } + config_free(root); free(root); free(data); diff --git a/services/audioflinger/AudioResampler.cpp b/services/audioflinger/AudioResampler.cpp index 323f1a4..3b5a8c1 100644 --- a/services/audioflinger/AudioResampler.cpp +++ b/services/audioflinger/AudioResampler.cpp @@ -25,6 +25,7 @@ #include "AudioResampler.h" #include "AudioResamplerSinc.h" #include "AudioResamplerCubic.h" +#include "AudioResamplerDyn.h" #ifdef __arm__ #include <machine/cpu-features.h> @@ -85,6 +86,9 @@ bool AudioResampler::qualityIsSupported(src_quality quality) case MED_QUALITY: case HIGH_QUALITY: case VERY_HIGH_QUALITY: + case DYN_LOW_QUALITY: + case DYN_MED_QUALITY: + case DYN_HIGH_QUALITY: return true; default: return false; @@ -105,7 +109,7 @@ void AudioResampler::init_routine() if (*endptr == '\0') { defaultQuality = (src_quality) l; ALOGD("forcing AudioResampler quality to %d", defaultQuality); - if (defaultQuality < DEFAULT_QUALITY || defaultQuality > VERY_HIGH_QUALITY) { + if (defaultQuality < DEFAULT_QUALITY || defaultQuality > DYN_HIGH_QUALITY) { defaultQuality = DEFAULT_QUALITY; } } @@ -125,6 +129,12 @@ uint32_t AudioResampler::qualityMHz(src_quality quality) return 20; case VERY_HIGH_QUALITY: return 34; + case DYN_LOW_QUALITY: + return 4; + case DYN_MED_QUALITY: + return 6; + case DYN_HIGH_QUALITY: + return 12; } } @@ -175,6 +185,15 @@ AudioResampler* AudioResampler::create(int bitDepth, int inChannelCount, case VERY_HIGH_QUALITY: quality = HIGH_QUALITY; break; + case DYN_LOW_QUALITY: + atFinalQuality = true; + break; + case DYN_MED_QUALITY: + quality = DYN_LOW_QUALITY; + break; + case DYN_HIGH_QUALITY: + quality = DYN_MED_QUALITY; + break; } } pthread_mutex_unlock(&mutex); @@ -200,6 +219,12 @@ AudioResampler* AudioResampler::create(int bitDepth, int inChannelCount, ALOGV("Create VERY_HIGH_QUALITY sinc Resampler = %d", quality); resampler = new AudioResamplerSinc(bitDepth, inChannelCount, sampleRate, quality); break; + case DYN_LOW_QUALITY: + case DYN_MED_QUALITY: + case DYN_HIGH_QUALITY: + ALOGV("Create dynamic Resampler = %d", quality); + resampler = new AudioResamplerDyn(bitDepth, inChannelCount, sampleRate, quality); + break; } // initialize resampler diff --git a/services/audioflinger/AudioResampler.h b/services/audioflinger/AudioResampler.h index 33e64ce..c341325 100644 --- a/services/audioflinger/AudioResampler.h +++ b/services/audioflinger/AudioResampler.h @@ -41,6 +41,9 @@ public: MED_QUALITY=2, HIGH_QUALITY=3, VERY_HIGH_QUALITY=4, + DYN_LOW_QUALITY=5, + DYN_MED_QUALITY=6, + DYN_HIGH_QUALITY=7, }; static AudioResampler* create(int bitDepth, int inChannelCount, diff --git a/services/audioflinger/AudioResamplerDyn.cpp b/services/audioflinger/AudioResamplerDyn.cpp new file mode 100644 index 0000000..984548d --- /dev/null +++ b/services/audioflinger/AudioResamplerDyn.cpp @@ -0,0 +1,551 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define LOG_TAG "AudioResamplerDyn" +//#define LOG_NDEBUG 0 + +#include <malloc.h> +#include <string.h> +#include <stdlib.h> +#include <dlfcn.h> +#include <math.h> + +#include <cutils/compiler.h> +#include <cutils/properties.h> +#include <utils/Log.h> + +#include "AudioResamplerFirOps.h" // USE_NEON and USE_INLINE_ASSEMBLY defined here +#include "AudioResamplerFirProcess.h" +#include "AudioResamplerFirProcessNeon.h" +#include "AudioResamplerFirGen.h" // requires math.h +#include "AudioResamplerDyn.h" + +//#define DEBUG_RESAMPLER + +namespace android { + +// generate a unique resample type compile-time constant (constexpr) +#define RESAMPLETYPE(CHANNELS, LOCKED, STRIDE, COEFTYPE) \ + ((((CHANNELS)-1)&1) | !!(LOCKED)<<1 | (COEFTYPE)<<2 \ + | ((STRIDE)==8 ? 1 : (STRIDE)==16 ? 2 : 0)<<3) + +/* + * InBuffer is a type agnostic input buffer. + * + * Layout of the state buffer for halfNumCoefs=8. + * + * [rrrrrrppppppppnnnnnnnnrrrrrrrrrrrrrrrrrrr.... rrrrrrr] + * S I R + * + * S = mState + * I = mImpulse + * R = mRingFull + * p = past samples, convoluted with the (p)ositive side of sinc() + * n = future samples, convoluted with the (n)egative side of sinc() + * r = extra space for implementing the ring buffer + */ + +template<typename TI> +AudioResamplerDyn::InBuffer<TI>::InBuffer() + : mState(NULL), mImpulse(NULL), mRingFull(NULL), mStateSize(0) { +} + +template<typename TI> +AudioResamplerDyn::InBuffer<TI>::~InBuffer() { + init(); +} + +template<typename TI> +void AudioResamplerDyn::InBuffer<TI>::init() { + free(mState); + mState = NULL; + mImpulse = NULL; + mRingFull = NULL; + mStateSize = 0; +} + +// resizes the state buffer to accommodate the appropriate filter length +template<typename TI> +void AudioResamplerDyn::InBuffer<TI>::resize(int CHANNELS, int halfNumCoefs) { + // calculate desired state size + int stateSize = halfNumCoefs * CHANNELS * 2 + * kStateSizeMultipleOfFilterLength; + + // check if buffer needs resizing + if (mState + && stateSize == mStateSize + && mRingFull-mState == mStateSize-halfNumCoefs*CHANNELS) { + return; + } + + // create new buffer + TI* state = (int16_t*)memalign(32, stateSize*sizeof(*state)); + memset(state, 0, stateSize*sizeof(*state)); + + // attempt to preserve state + if (mState) { + TI* srcLo = mImpulse - halfNumCoefs*CHANNELS; + TI* srcHi = mImpulse + halfNumCoefs*CHANNELS; + TI* dst = state; + + if (srcLo < mState) { + dst += mState-srcLo; + srcLo = mState; + } + if (srcHi > mState + mStateSize) { + srcHi = mState + mStateSize; + } + memcpy(dst, srcLo, (srcHi - srcLo) * sizeof(*srcLo)); + free(mState); + } + + // set class member vars + mState = state; + mStateSize = stateSize; + mImpulse = mState + halfNumCoefs*CHANNELS; // actually one sample greater than needed + mRingFull = mState + mStateSize - halfNumCoefs*CHANNELS; +} + +// copy in the input data into the head (impulse+halfNumCoefs) of the buffer. +template<typename TI> +template<int CHANNELS> +void AudioResamplerDyn::InBuffer<TI>::readAgain(TI*& impulse, const int halfNumCoefs, + const TI* const in, const size_t inputIndex) { + int16_t* head = impulse + halfNumCoefs*CHANNELS; + for (size_t i=0 ; i<CHANNELS ; i++) { + head[i] = in[inputIndex*CHANNELS + i]; + } +} + +// advance the impulse pointer, and load in data into the head (impulse+halfNumCoefs) +template<typename TI> +template<int CHANNELS> +void AudioResamplerDyn::InBuffer<TI>::readAdvance(TI*& impulse, const int halfNumCoefs, + const TI* const in, const size_t inputIndex) { + impulse += CHANNELS; + + if (CC_UNLIKELY(impulse >= mRingFull)) { + const size_t shiftDown = mRingFull - mState - halfNumCoefs*CHANNELS; + memcpy(mState, mState+shiftDown, halfNumCoefs*CHANNELS*2*sizeof(TI)); + impulse -= shiftDown; + } + readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex); +} + +void AudioResamplerDyn::Constants::set( + int L, int halfNumCoefs, int inSampleRate, int outSampleRate) +{ + int bits = 0; + int lscale = inSampleRate/outSampleRate < 2 ? L - 1 : + static_cast<int>(static_cast<uint64_t>(L)*inSampleRate/outSampleRate); + for (int i=lscale; i; ++bits, i>>=1) + ; + mL = L; + mShift = kNumPhaseBits - bits; + mHalfNumCoefs = halfNumCoefs; +} + +AudioResamplerDyn::AudioResamplerDyn(int bitDepth, + int inChannelCount, int32_t sampleRate, src_quality quality) + : AudioResampler(bitDepth, inChannelCount, sampleRate, quality), + mResampleType(0), mFilterSampleRate(0), mFilterQuality(DEFAULT_QUALITY), + mCoefBuffer(NULL) +{ + mVolumeSimd[0] = mVolumeSimd[1] = 0; + mConstants.set(128, 8, mSampleRate, mSampleRate); // TODO: set better +} + +AudioResamplerDyn::~AudioResamplerDyn() { + free(mCoefBuffer); +} + +void AudioResamplerDyn::init() { + mFilterSampleRate = 0; // always trigger new filter generation + mInBuffer.init(); +} + +void AudioResamplerDyn::setVolume(int16_t left, int16_t right) { + AudioResampler::setVolume(left, right); + mVolumeSimd[0] = static_cast<int32_t>(left)<<16; + mVolumeSimd[1] = static_cast<int32_t>(right)<<16; +} + +template <typename T> T max(T a, T b) {return a > b ? a : b;} + +template <typename T> T absdiff(T a, T b) {return a > b ? a - b : b - a;} + +template<typename T> +void AudioResamplerDyn::createKaiserFir(Constants &c, double stopBandAtten, + int inSampleRate, int outSampleRate, double tbwCheat) { + T* buf = reinterpret_cast<T*>(memalign(32, (c.mL+1)*c.mHalfNumCoefs*sizeof(T))); + static const double atten = 0.9998; // to avoid ripple overflow + double fcr; + double tbw = firKaiserTbw(c.mHalfNumCoefs, stopBandAtten); + + if (inSampleRate < outSampleRate) { // upsample + fcr = max(0.5*tbwCheat - tbw/2, tbw/2); + } else { // downsample + fcr = max(0.5*tbwCheat*outSampleRate/inSampleRate - tbw/2, tbw/2); + } + // create and set filter + firKaiserGen(buf, c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten); + c.setBuf(buf); + if (mCoefBuffer) { + free(mCoefBuffer); + } + mCoefBuffer = buf; +#ifdef DEBUG_RESAMPLER + // print basic filter stats + printf("L:%d hnc:%d stopBandAtten:%lf fcr:%lf atten:%lf tbw:%lf\n", + c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten, tbw); + // test the filter and report results + double fp = (fcr - tbw/2)/c.mL; + double fs = (fcr + tbw/2)/c.mL; + double passMin, passMax, passRipple; + double stopMax, stopRipple; + testFir(buf, c.mL, c.mHalfNumCoefs, fp, fs, /*passSteps*/ 1000, /*stopSteps*/ 100000, + passMin, passMax, passRipple, stopMax, stopRipple); + printf("passband(%lf, %lf): %.8lf %.8lf %.8lf\n", 0., fp, passMin, passMax, passRipple); + printf("stopband(%lf, %lf): %.8lf %.3lf\n", fs, 0.5, stopMax, stopRipple); +#endif +} + +// recursive gcd. Using objdump, it appears the tail recursion is converted to a while loop. +static int gcd(int n, int m) { + if (m == 0) { + return n; + } + return gcd(m, n % m); +} + +static bool isClose(int32_t newSampleRate, int32_t prevSampleRate, + int32_t filterSampleRate, int32_t outSampleRate) { + + // different upsampling ratios do not need a filter change. + if (filterSampleRate != 0 + && filterSampleRate < outSampleRate + && newSampleRate < outSampleRate) + return true; + + // check design criteria again if downsampling is detected. + int pdiff = absdiff(newSampleRate, prevSampleRate); + int adiff = absdiff(newSampleRate, filterSampleRate); + + // allow up to 6% relative change increments. + // allow up to 12% absolute change increments (from filter design) + return pdiff < prevSampleRate>>4 && adiff < filterSampleRate>>3; +} + +void AudioResamplerDyn::setSampleRate(int32_t inSampleRate) { + if (mInSampleRate == inSampleRate) { + return; + } + int32_t oldSampleRate = mInSampleRate; + int32_t oldHalfNumCoefs = mConstants.mHalfNumCoefs; + uint32_t oldPhaseWrapLimit = mConstants.mL << mConstants.mShift; + bool useS32 = false; + + mInSampleRate = inSampleRate; + + // TODO: Add precalculated Equiripple filters + + if (mFilterQuality != getQuality() || + !isClose(inSampleRate, oldSampleRate, mFilterSampleRate, mSampleRate)) { + mFilterSampleRate = inSampleRate; + mFilterQuality = getQuality(); + + // Begin Kaiser Filter computation + // + // The quantization floor for S16 is about 96db - 10*log_10(#length) + 3dB. + // Keep the stop band attenuation no greater than 84-85dB for 32 length S16 filters + // + // For s32 we keep the stop band attenuation at the same as 16b resolution, about + // 96-98dB + // + + double stopBandAtten; + double tbwCheat = 1.; // how much we "cheat" into aliasing + int halfLength; + if (mFilterQuality == DYN_HIGH_QUALITY) { + // 32b coefficients, 64 length + useS32 = true; + stopBandAtten = 98.; + halfLength = 32; + } else if (mFilterQuality == DYN_LOW_QUALITY) { + // 16b coefficients, 16-32 length + useS32 = false; + stopBandAtten = 80.; + if (mSampleRate >= inSampleRate * 2) { + halfLength = 16; + } else { + halfLength = 8; + } + if (mSampleRate >= inSampleRate) { + tbwCheat = 1.05; + } else { + tbwCheat = 1.03; + } + } else { // DYN_MED_QUALITY + // 16b coefficients, 32-64 length + // note: > 64 length filters with 16b coefs can have quantization noise problems + useS32 = false; + stopBandAtten = 84.; + if (mSampleRate >= inSampleRate * 4) { + halfLength = 32; + } else if (mSampleRate >= inSampleRate * 2) { + halfLength = 24; + } else { + halfLength = 16; + } + if (mSampleRate >= inSampleRate) { + tbwCheat = 1.03; + } else { + tbwCheat = 1.01; + } + } + + // determine the number of polyphases in the filterbank. + // for 16b, it is desirable to have 2^(16/2) = 256 phases. + // https://ccrma.stanford.edu/~jos/resample/Relation_Interpolation_Error_Quantization.html + // + // We are a bit more lax on this. + + int phases = mSampleRate / gcd(mSampleRate, inSampleRate); + + // TODO: Once dynamic sample rate change is an option, the code below + // should be modified to execute only when dynamic sample rate change is enabled. + // + // as above, #phases less than 63 is too few phases for accurate linear interpolation. + // we increase the phases to compensate, but more phases means more memory per + // filter and more time to compute the filter. + // + // if we know that the filter will be used for dynamic sample rate changes, + // that would allow us skip this part for fixed sample rate resamplers. + // + while (phases<63) { + phases *= 2; // this code only needed to support dynamic rate changes + } + + if (phases>=256) { // too many phases, always interpolate + phases = 127; + } + + // create the filter + mConstants.set(phases, halfLength, inSampleRate, mSampleRate); + if (useS32) { + createKaiserFir<int32_t>(mConstants, stopBandAtten, + inSampleRate, mSampleRate, tbwCheat); + } else { + createKaiserFir<int16_t>(mConstants, stopBandAtten, + inSampleRate, mSampleRate, tbwCheat); + } + } // End Kaiser filter + + // update phase and state based on the new filter. + const Constants& c(mConstants); + mInBuffer.resize(mChannelCount, c.mHalfNumCoefs); + const uint32_t phaseWrapLimit = c.mL << c.mShift; + // try to preserve as much of the phase fraction as possible for on-the-fly changes + mPhaseFraction = static_cast<unsigned long long>(mPhaseFraction) + * phaseWrapLimit / oldPhaseWrapLimit; + mPhaseFraction %= phaseWrapLimit; // should not do anything, but just in case. + mPhaseIncrement = static_cast<uint32_t>(static_cast<double>(phaseWrapLimit) + * inSampleRate / mSampleRate); + + // determine which resampler to use + // check if locked phase (works only if mPhaseIncrement has no "fractional phase bits") + int locked = (mPhaseIncrement << (sizeof(mPhaseIncrement)*8 - c.mShift)) == 0; + int stride = (c.mHalfNumCoefs&7)==0 ? 16 : (c.mHalfNumCoefs&3)==0 ? 8 : 2; + if (locked) { + mPhaseFraction = mPhaseFraction >> c.mShift << c.mShift; // remove fractional phase + } + if (!USE_NEON) { + stride = 2; // C version only + } + // TODO: Remove this for testing + //stride = 2; + mResampleType = RESAMPLETYPE(mChannelCount, locked, stride, !!useS32); +#ifdef DEBUG_RESAMPLER + printf("channels:%d %s stride:%d %s coef:%d shift:%d\n", + mChannelCount, locked ? "locked" : "interpolated", + stride, useS32 ? "S32" : "S16", 2*c.mHalfNumCoefs, c.mShift); +#endif +} + +void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount, + AudioBufferProvider* provider) +{ + // TODO: + // 24 cases - this perhaps can be reduced later, as testing might take too long + switch (mResampleType) { + + // stride 16 (stride 2 for machines that do not support NEON) + case RESAMPLETYPE(1, true, 16, 0): + return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(2, true, 16, 0): + return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(1, false, 16, 0): + return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(2, false, 16, 0): + return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(1, true, 16, 1): + return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(2, true, 16, 1): + return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(1, false, 16, 1): + return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(2, false, 16, 1): + return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider); +#if 0 + // TODO: Remove these? + // stride 8 + case RESAMPLETYPE(1, true, 8, 0): + return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(2, true, 8, 0): + return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(1, false, 8, 0): + return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(2, false, 8, 0): + return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(1, true, 8, 1): + return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(2, true, 8, 1): + return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(1, false, 8, 1): + return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(2, false, 8, 1): + return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + // stride 2 (can handle any filter length) + case RESAMPLETYPE(1, true, 2, 0): + return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(2, true, 2, 0): + return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(1, false, 2, 0): + return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(2, false, 2, 0): + return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(1, true, 2, 1): + return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(2, true, 2, 1): + return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(1, false, 2, 1): + return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(2, false, 2, 1): + return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider); +#endif + default: + ; // error + } +} + +template<int CHANNELS, bool LOCKED, int STRIDE, typename TC> +void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount, + const TC* const coefs, AudioBufferProvider* provider) +{ + const Constants& c(mConstants); + int16_t* impulse = mInBuffer.getImpulse(); + size_t inputIndex = mInputIndex; + uint32_t phaseFraction = mPhaseFraction; + const uint32_t phaseIncrement = mPhaseIncrement; + size_t outputIndex = 0; + size_t outputSampleCount = outFrameCount * 2; // stereo output + size_t inFrameCount = (outFrameCount*mInSampleRate)/mSampleRate; + const uint32_t phaseWrapLimit = c.mL << c.mShift; + + // NOTE: be very careful when modifying the code here. register + // pressure is very high and a small change might cause the compiler + // to generate far less efficient code. + // Always sanity check the result with objdump or test-resample. + + // the following logic is a bit convoluted to keep the main processing loop + // as tight as possible with register allocation. + while (outputIndex < outputSampleCount) { + // buffer is empty, fetch a new one + while (mBuffer.frameCount == 0) { + mBuffer.frameCount = inFrameCount; + provider->getNextBuffer(&mBuffer, + calculateOutputPTS(outputIndex / 2)); + if (mBuffer.raw == NULL) { + goto resample_exit; + } + if (phaseFraction >= phaseWrapLimit) { // read in data + mInBuffer.readAdvance<CHANNELS>( + impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex); + phaseFraction -= phaseWrapLimit; + while (phaseFraction >= phaseWrapLimit) { + inputIndex++; + if (inputIndex >= mBuffer.frameCount) { + inputIndex -= mBuffer.frameCount; + provider->releaseBuffer(&mBuffer); + break; + } + mInBuffer.readAdvance<CHANNELS>( + impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex); + phaseFraction -= phaseWrapLimit; + } + } + } + const int16_t* const in = mBuffer.i16; + const size_t frameCount = mBuffer.frameCount; + const int coefShift = c.mShift; + const int halfNumCoefs = c.mHalfNumCoefs; + const int32_t* const volumeSimd = mVolumeSimd; + + // reread the last input in. + mInBuffer.readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex); + + // main processing loop + while (CC_LIKELY(outputIndex < outputSampleCount)) { + // caution: fir() is inlined and may be large. + // output will be loaded with the appropriate values + // + // from the input samples in impulse[-halfNumCoefs+1]... impulse[halfNumCoefs] + // from the polyphase filter of (phaseFraction / phaseWrapLimit) in coefs. + // + fir<CHANNELS, LOCKED, STRIDE>( + &out[outputIndex], + phaseFraction, phaseWrapLimit, + coefShift, halfNumCoefs, coefs, + impulse, volumeSimd); + outputIndex += 2; + + phaseFraction += phaseIncrement; + while (phaseFraction >= phaseWrapLimit) { + inputIndex++; + if (inputIndex >= frameCount) { + goto done; // need a new buffer + } + mInBuffer.readAdvance<CHANNELS>(impulse, halfNumCoefs, in, inputIndex); + phaseFraction -= phaseWrapLimit; + } + } +done: + // often arrives here when input buffer runs out + if (inputIndex >= frameCount) { + inputIndex -= frameCount; + provider->releaseBuffer(&mBuffer); + // mBuffer.frameCount MUST be zero here. + } + } + +resample_exit: + mInBuffer.setImpulse(impulse); + mInputIndex = inputIndex; + mPhaseFraction = phaseFraction; +} + +// ---------------------------------------------------------------------------- +}; // namespace android diff --git a/services/audioflinger/AudioResamplerDyn.h b/services/audioflinger/AudioResamplerDyn.h new file mode 100644 index 0000000..df1fdbe --- /dev/null +++ b/services/audioflinger/AudioResamplerDyn.h @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_AUDIO_RESAMPLER_DYN_H +#define ANDROID_AUDIO_RESAMPLER_DYN_H + +#include <stdint.h> +#include <sys/types.h> +#include <cutils/log.h> + +#include "AudioResampler.h" + +namespace android { + +class AudioResamplerDyn: public AudioResampler { +public: + AudioResamplerDyn(int bitDepth, int inChannelCount, int32_t sampleRate, + src_quality quality); + + virtual ~AudioResamplerDyn(); + + virtual void init(); + + virtual void setSampleRate(int32_t inSampleRate); + + virtual void setVolume(int16_t left, int16_t right); + + virtual void resample(int32_t* out, size_t outFrameCount, + AudioBufferProvider* provider); + +private: + + class Constants { // stores the filter constants. + public: + Constants() : + mL(0), mShift(0), mHalfNumCoefs(0), mFirCoefsS16(NULL) + {} + void set(int L, int halfNumCoefs, + int inSampleRate, int outSampleRate); + inline void setBuf(int16_t* buf) { + mFirCoefsS16 = buf; + } + inline void setBuf(int32_t* buf) { + mFirCoefsS32 = buf; + } + + int mL; // interpolation phases in the filter. + int mShift; // right shift to get polyphase index + unsigned int mHalfNumCoefs; // filter half #coefs + union { // polyphase filter bank + const int16_t* mFirCoefsS16; + const int32_t* mFirCoefsS32; + }; + }; + + // Input buffer management for a given input type TI, now (int16_t) + // Is agnostic of the actual type, can work with int32_t and float. + template<typename TI> + class InBuffer { + public: + InBuffer(); + ~InBuffer(); + void init(); + void resize(int CHANNELS, int halfNumCoefs); + + // used for direct management of the mImpulse pointer + inline TI* getImpulse() { + return mImpulse; + } + inline void setImpulse(TI *impulse) { + mImpulse = impulse; + } + template<int CHANNELS> + inline void readAgain(TI*& impulse, const int halfNumCoefs, + const TI* const in, const size_t inputIndex); + template<int CHANNELS> + inline void readAdvance(TI*& impulse, const int halfNumCoefs, + const TI* const in, const size_t inputIndex); + + private: + // tuning parameter guidelines: 2 <= multiple <= 8 + static const int kStateSizeMultipleOfFilterLength = 4; + + TI* mState; // base pointer for the input buffer storage + TI* mImpulse; // current location of the impulse response (centered) + TI* mRingFull; // mState <= mImpulse < mRingFull + // in general, mRingFull = mState + mStateSize - halfNumCoefs*CHANNELS. + size_t mStateSize; // in units of TI. + }; + + template<int CHANNELS, bool LOCKED, int STRIDE, typename TC> + void resample(int32_t* out, size_t outFrameCount, + const TC* const coefs, AudioBufferProvider* provider); + + template<typename T> + void createKaiserFir(Constants &c, double stopBandAtten, + int inSampleRate, int outSampleRate, double tbwCheat); + + InBuffer<int16_t> mInBuffer; + Constants mConstants; // current set of coefficient parameters + int32_t __attribute__ ((aligned (8))) mVolumeSimd[2]; + int32_t mResampleType; // contains the resample type. + int32_t mFilterSampleRate; // designed filter sample rate. + src_quality mFilterQuality; // designed filter quality. + void* mCoefBuffer; // if a filter is created, this is not null +}; + +// ---------------------------------------------------------------------------- +}; // namespace android + +#endif /*ANDROID_AUDIO_RESAMPLER_DYN_H*/ diff --git a/services/audioflinger/AudioResamplerFirGen.h b/services/audioflinger/AudioResamplerFirGen.h new file mode 100644 index 0000000..fac3001 --- /dev/null +++ b/services/audioflinger/AudioResamplerFirGen.h @@ -0,0 +1,684 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_AUDIO_RESAMPLER_FIR_GEN_H +#define ANDROID_AUDIO_RESAMPLER_FIR_GEN_H + +namespace android { + +/* + * generates a sine wave at equal steps. + * + * As most of our functions use sine or cosine at equal steps, + * it is very efficient to compute them that way (single multiply and subtract), + * rather than invoking the math library sin() or cos() each time. + * + * SineGen uses Goertzel's Algorithm (as a generator not a filter) + * to calculate sine(wstart + n * wstep) or cosine(wstart + n * wstep) + * by stepping through 0, 1, ... n. + * + * e^i(wstart+wstep) = 2cos(wstep) * e^i(wstart) - e^i(wstart-wstep) + * + * or looking at just the imaginary sine term, as the cosine follows identically: + * + * sin(wstart+wstep) = 2cos(wstep) * sin(wstart) - sin(wstart-wstep) + * + * Goertzel's algorithm is more efficient than the angle addition formula, + * e^i(wstart+wstep) = e^i(wstart) * e^i(wstep), which takes up to + * 4 multiplies and 2 adds (or 3* and 3+) and requires both sine and + * cosine generation due to the complex * complex multiply (full rotation). + * + * See: http://en.wikipedia.org/wiki/Goertzel_algorithm + * + */ + +class SineGen { +public: + SineGen(double wstart, double wstep, bool cosine = false) { + if (cosine) { + mCurrent = cos(wstart); + mPrevious = cos(wstart - wstep); + } else { + mCurrent = sin(wstart); + mPrevious = sin(wstart - wstep); + } + mTwoCos = 2.*cos(wstep); + } + SineGen(double expNow, double expPrev, double twoCosStep) { + mCurrent = expNow; + mPrevious = expPrev; + mTwoCos = twoCosStep; + } + inline double value() const { + return mCurrent; + } + inline void advance() { + double tmp = mCurrent; + mCurrent = mCurrent*mTwoCos - mPrevious; + mPrevious = tmp; + } + inline double valueAdvance() { + double tmp = mCurrent; + mCurrent = mCurrent*mTwoCos - mPrevious; + mPrevious = tmp; + return tmp; + } + +private: + double mCurrent; // current value of sine/cosine + double mPrevious; // previous value of sine/cosine + double mTwoCos; // stepping factor +}; + +/* + * generates a series of sine generators, phase offset by fixed steps. + * + * This is used to generate polyphase sine generators, one per polyphase + * in the filter code below. + * + * The SineGen returned by value() starts at innerStart = outerStart + n*outerStep; + * increments by innerStep. + * + */ + +class SineGenGen { +public: + SineGenGen(double outerStart, double outerStep, double innerStep, bool cosine = false) + : mSineInnerCur(outerStart, outerStep, cosine), + mSineInnerPrev(outerStart-innerStep, outerStep, cosine) + { + mTwoCos = 2.*cos(innerStep); + } + inline SineGen value() { + return SineGen(mSineInnerCur.value(), mSineInnerPrev.value(), mTwoCos); + } + inline void advance() { + mSineInnerCur.advance(); + mSineInnerPrev.advance(); + } + inline SineGen valueAdvance() { + return SineGen(mSineInnerCur.valueAdvance(), mSineInnerPrev.valueAdvance(), mTwoCos); + } + +private: + SineGen mSineInnerCur; // generate the inner sine values (stepped by outerStep). + SineGen mSineInnerPrev; // generate the inner sine previous values + // (behind by innerStep, stepped by outerStep). + double mTwoCos; // the inner stepping factor for the returned SineGen. +}; + +static inline double sqr(double x) { + return x * x; +} + +/* + * rounds a double to the nearest integer for FIR coefficients. + * + * One variant uses noise shaping, which must keep error history + * to work (the err parameter, initialized to 0). + * The other variant is a non-noise shaped version for + * S32 coefficients (noise shaping doesn't gain much). + * + * Caution: No bounds saturation is applied, but isn't needed in this case. + * + * @param x is the value to round. + * + * @param maxval is the maximum integer scale factor expressed as an int64 (for headroom). + * Typically this may be the maximum positive integer+1 (using the fact that double precision + * FIR coefficients generated here are never that close to 1.0 to pose an overflow condition). + * + * @param err is the previous error (actual - rounded) for the previous rounding op. + * For 16b coefficients this can improve stopband dB performance by up to 2dB. + * + * Many variants exist for the noise shaping: http://en.wikipedia.org/wiki/Noise_shaping + * + */ + +static inline int64_t toint(double x, int64_t maxval, double& err) { + double val = x * maxval; + double ival = floor(val + 0.5 + err*0.2); + err = val - ival; + return static_cast<int64_t>(ival); +} + +static inline int64_t toint(double x, int64_t maxval) { + return static_cast<int64_t>(floor(x * maxval + 0.5)); +} + +/* + * Modified Bessel function of the first kind + * http://en.wikipedia.org/wiki/Bessel_function + * + * The formulas are taken from Abramowitz and Stegun, + * _Handbook of Mathematical Functions_ (links below): + * + * http://people.math.sfu.ca/~cbm/aands/page_375.htm + * http://people.math.sfu.ca/~cbm/aands/page_378.htm + * + * http://dlmf.nist.gov/10.25 + * http://dlmf.nist.gov/10.40 + * + * Note we assume x is nonnegative (the function is symmetric, + * pass in the absolute value as needed). + * + * Constants are compile time derived with templates I0Term<> and + * I0ATerm<> to the precision of the compiler. The series can be expanded + * to any precision needed, but currently set around 24b precision. + * + * We use a bit of template math here, constexpr would probably be + * more appropriate for a C++11 compiler. + * + * For the intermediate range 3.75 < x < 15, we use minimax polynomial fit. + * + */ + +template <int N> +struct I0Term { + static const double value = I0Term<N-1>::value / (4. * N * N); +}; + +template <> +struct I0Term<0> { + static const double value = 1.; +}; + +template <int N> +struct I0ATerm { + static const double value = I0ATerm<N-1>::value * (2.*N-1.) * (2.*N-1.) / (8. * N); +}; + +template <> +struct I0ATerm<0> { // 1/sqrt(2*PI); + static const double value = 0.398942280401432677939946059934381868475858631164934657665925; +}; + +#if USE_HORNERS_METHOD +/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ... + * using Horner's Method: http://en.wikipedia.org/wiki/Horner's_method + * + * This has fewer multiplications than Estrin's method below, but has back to back + * floating point dependencies. + * + * On ARM this appears to work slower, so USE_HORNERS_METHOD is not default enabled. + */ + +inline double Poly2(double A, double B, double x) { + return A + x * B; +} + +inline double Poly4(double A, double B, double C, double D, double x) { + return A + x * (B + x * (C + x * (D))); +} + +inline double Poly7(double A, double B, double C, double D, double E, double F, double G, + double x) { + return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G)))))); +} + +inline double Poly9(double A, double B, double C, double D, double E, double F, double G, + double H, double I, double x) { + return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G + x * (H + x * (I)))))))); +} + +#else +/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ... + * using Estrin's Method: http://en.wikipedia.org/wiki/Estrin's_scheme + * + * This is typically faster, perhaps gains about 5-10% overall on ARM processors + * over Horner's method above. + */ + +inline double Poly2(double A, double B, double x) { + return A + B * x; +} + +inline double Poly3(double A, double B, double C, double x, double x2) { + return Poly2(A, B, x) + C * x2; +} + +inline double Poly3(double A, double B, double C, double x) { + return Poly2(A, B, x) + C * x * x; +} + +inline double Poly4(double A, double B, double C, double D, double x, double x2) { + return Poly2(A, B, x) + Poly2(C, D, x) * x2; // same as poly2(poly2, poly2, x2); +} + +inline double Poly4(double A, double B, double C, double D, double x) { + return Poly4(A, B, C, D, x, x * x); +} + +inline double Poly7(double A, double B, double C, double D, double E, double F, double G, + double x) { + double x2 = x * x; + return Poly4(A, B, C, D, x, x2) + Poly3(E, F, G, x, x2) * (x2 * x2); +} + +inline double Poly8(double A, double B, double C, double D, double E, double F, double G, + double H, double x, double x2, double x4) { + return Poly4(A, B, C, D, x, x2) + Poly4(E, F, G, H, x, x2) * x4; +} + +inline double Poly9(double A, double B, double C, double D, double E, double F, double G, + double H, double I, double x) { + double x2 = x * x; +#if 1 + // It does not seem faster to explicitly decompose Poly8 into Poly4, but + // could depend on compiler floating point scheduling. + double x4 = x2 * x2; + return Poly8(A, B, C, D, E, F, G, H, x, x2, x4) + I * (x4 * x4); +#else + double val = Poly4(A, B, C, D, x, x2); + double x4 = x2 * x2; + return val + Poly4(E, F, G, H, x, x2) * x4 + I * (x4 * x4); +#endif +} +#endif + +static inline double I0(double x) { + if (x < 3.75) { + x *= x; + return Poly7(I0Term<0>::value, I0Term<1>::value, + I0Term<2>::value, I0Term<3>::value, + I0Term<4>::value, I0Term<5>::value, + I0Term<6>::value, x); // e < 1.6e-7 + } + if (1) { + /* + * Series expansion coefs are easy to calculate, but are expanded around 0, + * so error is unequal over the interval 0 < x < 3.75, the error being + * significantly better near 0. + * + * A better solution is to use precise minimax polynomial fits. + * + * We use a slightly more complicated solution for 3.75 < x < 15, based on + * the tables in Blair and Edwards, "Stable Rational Minimax Approximations + * to the Modified Bessel Functions I0(x) and I1(x)", Chalk Hill Nuclear Laboratory, + * AECL-4928. + * + * http://www.iaea.org/inis/collection/NCLCollectionStore/_Public/06/178/6178667.pdf + * + * See Table 11 for 0 < x < 15; e < 10^(-7.13). + * + * Note: Beta cannot exceed 15 (hence Stopband cannot exceed 144dB = 24b). + * + * This speeds up overall computation by about 40% over using the else clause below, + * which requires sqrt and exp. + * + */ + + x *= x; + double num = Poly9(-0.13544938430e9, -0.33153754512e8, + -0.19406631946e7, -0.48058318783e5, + -0.63269783360e3, -0.49520779070e1, + -0.24970910370e-1, -0.74741159550e-4, + -0.18257612460e-6, x); + double y = x - 225.; // reflection around 15 (squared) + double den = Poly4(-0.34598737196e8, 0.23852643181e6, + -0.70699387620e3, 0.10000000000e1, y); + return num / den; + +#if IO_EXTENDED_BETA + /* Table 42 for x > 15; e < 10^(-8.11). + * This is used for Beta>15, but is disabled here as + * we never use Beta that high. + * + * NOTE: This should be enabled only for x > 15. + */ + + double y = 1./x; + double z = y - (1./15); + double num = Poly2(0.415079861746e1, -0.5149092496e1, z); + double den = Poly3(0.103150763823e2, -0.14181687413e2, + 0.1000000000e1, z); + return exp(x) * sqrt(y) * num / den; +#endif + } else { + /* + * NOT USED, but reference for large Beta. + * + * Abramowitz and Stegun asymptotic formula. + * works for x > 3.75. + */ + double y = 1./x; + return exp(x) * sqrt(y) * + // note: reciprocal squareroot may be easier! + // http://en.wikipedia.org/wiki/Fast_inverse_square_root + Poly9(I0ATerm<0>::value, I0ATerm<1>::value, + I0ATerm<2>::value, I0ATerm<3>::value, + I0ATerm<4>::value, I0ATerm<5>::value, + I0ATerm<6>::value, I0ATerm<7>::value, + I0ATerm<8>::value, y); // (... e) < 1.9e-7 + } +} + +/* + * calculates the transition bandwidth for a Kaiser filter + * + * Formula 3.2.8, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48 + * Formula 7.76, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542 + * + * @param halfNumCoef is half the number of coefficients per filter phase. + * + * @param stopBandAtten is the stop band attenuation desired. + * + * @return the transition bandwidth in normalized frequency (0 <= f <= 0.5) + */ +static inline double firKaiserTbw(int halfNumCoef, double stopBandAtten) { + return (stopBandAtten - 7.95)/((2.*14.36)*halfNumCoef); +} + +/* + * calculates the fir transfer response of the overall polyphase filter at w. + * + * Calculates the DTFT transfer coefficient H(w) for 0 <= w <= PI, utilizing the + * fact that h[n] is symmetric (cosines only, no complex arithmetic). + * + * We use Goertzel's algorithm to accelerate the computation to essentially + * a single multiply and 2 adds per filter coefficient h[]. + * + * Be careful be careful to consider that h[n] is the overall polyphase filter, + * with L phases, so rescaling H(w)/L is probably what you expect for "unity gain", + * as you only use one of the polyphases at a time. + */ +template <typename T> +static inline double firTransfer(const T* coef, int L, int halfNumCoef, double w) { + double accum = static_cast<double>(coef[0])*0.5; // "center coefficient" from first bank + coef += halfNumCoef; // skip first filterbank (picked up by the last filterbank). +#if SLOW_FIRTRANSFER + /* Original code for reference. This is equivalent to the code below, but slower. */ + for (int i=1 ; i<=L ; ++i) { + for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) { + accum += cos(ix*w)*static_cast<double>(*coef++); + } + } +#else + /* + * Our overall filter is stored striped by polyphases, not a contiguous h[n]. + * We could fetch coefficients in a non-contiguous fashion + * but that will not scale to vector processing. + * + * We apply Goertzel's algorithm directly to each polyphase filter bank instead of + * using cosine generation/multiplication, thereby saving one multiply per inner loop. + * + * See: http://en.wikipedia.org/wiki/Goertzel_algorithm + * Also: Oppenheim and Schafer, _Discrete Time Signal Processing, 3e_, p. 720. + * + * We use the basic recursion to incorporate the cosine steps into real sequence x[n]: + * s[n] = x[n] + (2cosw)*s[n-1] + s[n-2] + * + * y[n] = s[n] - e^(iw)s[n-1] + * = sum_{k=-\infty}^{n} x[k]e^(-iw(n-k)) + * = e^(-iwn) sum_{k=0}^{n} x[k]e^(iwk) + * + * The summation contains the frequency steps we want multiplied by the source + * (similar to a DTFT). + * + * Using symmetry, and just the real part (be careful, this must happen + * after any internal complex multiplications), the polyphase filterbank + * transfer function is: + * + * Hpp[n, w, w_0] = sum_{k=0}^{n} x[k] * cos(wk + w_0) + * = Re{ e^(iwn + iw_0) y[n]} + * = cos(wn+w_0) * s[n] - cos(w(n+1)+w_0) * s[n-1] + * + * using the fact that s[n] of real x[n] is real. + * + */ + double dcos = 2. * cos(L*w); + int start = ((halfNumCoef)*L + 1); + SineGen cc((start - L) * w, w, true); // cosine + SineGen cp(start * w, w, true); // cosine + for (int i=1 ; i<=L ; ++i) { + double sc = 0; + double sp = 0; + for (int j=0 ; j<halfNumCoef ; ++j) { + double tmp = sc; + sc = static_cast<double>(*coef++) + dcos*sc - sp; + sp = tmp; + } + // If we are awfully clever, we can apply Goertzel's algorithm + // again on the sc and sp sequences returned here. + accum += cc.valueAdvance() * sc - cp.valueAdvance() * sp; + } +#endif + return accum*2.; +} + +/* + * evaluates the minimum and maximum |H(f)| bound in a band region. + * + * This is usually done with equally spaced increments in the target band in question. + * The passband is often very small, and sampled that way. The stopband is often much + * larger. + * + * We use the fact that the overall polyphase filter has an additional bank at the end + * for interpolation; hence it is overspecified for the H(f) computation. Thus the + * first polyphase is never actually checked, excepting its first term. + * + * In this code we use the firTransfer() evaluator above, which uses Goertzel's + * algorithm to calculate the transfer function at each point. + * + * TODO: An alternative with equal spacing is the FFT/DFT. An alternative with unequal + * spacing is a chirp transform. + * + * @param coef is the designed polyphase filter banks + * + * @param L is the number of phases (for interpolation) + * + * @param halfNumCoef should be half the number of coefficients for a single + * polyphase. + * + * @param fstart is the normalized frequency start. + * + * @param fend is the normalized frequency end. + * + * @param steps is the number of steps to take (sampling) between frequency start and end + * + * @param firMin returns the minimum transfer |H(f)| found + * + * @param firMax returns the maximum transfer |H(f)| found + * + * 0 <= f <= 0.5. + * This is used to test passband and stopband performance. + */ +template <typename T> +static void testFir(const T* coef, int L, int halfNumCoef, + double fstart, double fend, int steps, double &firMin, double &firMax) { + double wstart = fstart*(2.*M_PI); + double wend = fend*(2.*M_PI); + double wstep = (wend - wstart)/steps; + double fmax, fmin; + double trf = firTransfer(coef, L, halfNumCoef, wstart); + if (trf<0) { + trf = -trf; + } + fmin = fmax = trf; + wstart += wstep; + for (int i=1; i<steps; ++i) { + trf = firTransfer(coef, L, halfNumCoef, wstart); + if (trf<0) { + trf = -trf; + } + if (trf>fmax) { + fmax = trf; + } + else if (trf<fmin) { + fmin = trf; + } + wstart += wstep; + } + // renormalize - this is only needed for integer filter types + double norm = 1./((1ULL<<(sizeof(T)*8-1))*L); + + firMin = fmin * norm; + firMax = fmax * norm; +} + +/* + * evaluates the |H(f)| lowpass band characteristics. + * + * This function tests the lowpass characteristics for the overall polyphase filter, + * and is used to verify the design. For this case, fp should be set to the + * passband normalized frequency from 0 to 0.5 for the overall filter (thus it + * is the designed polyphase bank value / L). Likewise for fs. + * + * @param coef is the designed polyphase filter banks + * + * @param L is the number of phases (for interpolation) + * + * @param halfNumCoef should be half the number of coefficients for a single + * polyphase. + * + * @param fp is the passband normalized frequency, 0 < fp < fs < 0.5. + * + * @param fs is the stopband normalized frequency, 0 < fp < fs < 0.5. + * + * @param passSteps is the number of passband sampling steps. + * + * @param stopSteps is the number of stopband sampling steps. + * + * @param passMin is the minimum value in the passband + * + * @param passMax is the maximum value in the passband (useful for scaling). This should + * be less than 1., to avoid sine wave test overflow. + * + * @param passRipple is the passband ripple. Typically this should be less than 0.1 for + * an audio filter. Generally speaker/headphone device characteristics will dominate + * the passband term. + * + * @param stopMax is the maximum value in the stopband. + * + * @param stopRipple is the stopband ripple, also known as stopband attenuation. + * Typically this should be greater than ~80dB for low quality, and greater than + * ~100dB for full 16b quality, otherwise aliasing may become noticeable. + * + */ +template <typename T> +static void testFir(const T* coef, int L, int halfNumCoef, + double fp, double fs, int passSteps, int stopSteps, + double &passMin, double &passMax, double &passRipple, + double &stopMax, double &stopRipple) { + double fmin, fmax; + testFir(coef, L, halfNumCoef, 0., fp, passSteps, fmin, fmax); + double d1 = (fmax - fmin)/2.; + passMin = fmin; + passMax = fmax; + passRipple = -20.*log10(1. - d1); // passband ripple + testFir(coef, L, halfNumCoef, fs, 0.5, stopSteps, fmin, fmax); + // fmin is really not important for the stopband. + stopMax = fmax; + stopRipple = -20.*log10(fmax); // stopband ripple/attenuation +} + +/* + * Calculates the overall polyphase filter based on a windowed sinc function. + * + * The windowed sinc is an odd length symmetric filter of exactly L*halfNumCoef*2+1 + * taps for the entire kernel. This is then decomposed into L+1 polyphase filterbanks. + * The last filterbank is used for interpolation purposes (and is mostly composed + * of the first bank shifted by one sample), and is unnecessary if one does + * not do interpolation. + * + * We use the last filterbank for some transfer function calculation purposes, + * so it needs to be generated anyways. + * + * @param coef is the caller allocated space for coefficients. This should be + * exactly (L+1)*halfNumCoef in size. + * + * @param L is the number of phases (for interpolation) + * + * @param halfNumCoef should be half the number of coefficients for a single + * polyphase. + * + * @param stopBandAtten is the stopband value, should be >50dB. + * + * @param fcr is cutoff frequency/sampling rate (<0.5). At this point, the energy + * should be 6dB less. (fcr is where the amplitude drops by half). Use the + * firKaiserTbw() to calculate the transition bandwidth. fcr is the midpoint + * between the stop band and the pass band (fstop+fpass)/2. + * + * @param atten is the attenuation (generally slightly less than 1). + */ + +template <typename T> +static inline void firKaiserGen(T* coef, int L, int halfNumCoef, + double stopBandAtten, double fcr, double atten) { + // + // Formula 3.2.5, 3.2.7, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48 + // Formula 7.75, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542 + // + // See also: http://melodi.ee.washington.edu/courses/ee518/notes/lec17.pdf + // + // Kaiser window and beta parameter + // + // | 0.1102*(A - 8.7) A > 50 + // beta = | 0.5842*(A - 21)^0.4 + 0.07886*(A - 21) 21 <= A <= 50 + // | 0. A < 21 + // + // with A is the desired stop-band attenuation in dBFS + // + // 30 dB 2.210 + // 40 dB 3.384 + // 50 dB 4.538 + // 60 dB 5.658 + // 70 dB 6.764 + // 80 dB 7.865 + // 90 dB 8.960 + // 100 dB 10.056 + + const int N = L * halfNumCoef; // non-negative half + const double beta = 0.1102 * (stopBandAtten - 8.7); // >= 50dB always + const double xstep = (2. * M_PI) * fcr / L; + const double xfrac = 1. / N; + const double yscale = atten * L / (I0(beta) * M_PI); + + // We use sine generators, which computes sines on regular step intervals. + // This speeds up overall computation about 40% from computing the sine directly. + + SineGenGen sgg(0., xstep, L*xstep); // generates sine generators (one per polyphase) + + for (int i=0 ; i<=L ; ++i) { // generate an extra set of coefs for interpolation + + // computation for a single polyphase of the overall filter. + SineGen sg = sgg.valueAdvance(); // current sine generator for "j" inner loop. + double err = 0; // for noise shaping on int16_t coefficients (over each polyphase) + + for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) { + double y; + if (CC_LIKELY(ix)) { + double x = static_cast<double>(ix); + + // sine generator: sg.valueAdvance() returns sin(ix*xstep); + y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x; + } else { + y = 2. * atten * fcr; // center of filter, sinc(0) = 1. + sg.advance(); + } + + // (caution!) float version does not need rounding + if (is_same<T, int16_t>::value) { // int16_t needs noise shaping + *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1), err)); + } else { + *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1))); + } + } + } +} + +}; // namespace android + +#endif /*ANDROID_AUDIO_RESAMPLER_FIR_GEN_H*/ diff --git a/services/audioflinger/AudioResamplerFirOps.h b/services/audioflinger/AudioResamplerFirOps.h new file mode 100644 index 0000000..bf2163f --- /dev/null +++ b/services/audioflinger/AudioResamplerFirOps.h @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_AUDIO_RESAMPLER_FIR_OPS_H +#define ANDROID_AUDIO_RESAMPLER_FIR_OPS_H + +namespace android { + +#if defined(__arm__) && !defined(__thumb__) +#define USE_INLINE_ASSEMBLY (true) +#else +#define USE_INLINE_ASSEMBLY (false) +#endif + +#if USE_INLINE_ASSEMBLY && defined(__ARM_NEON__) +#define USE_NEON (true) +#include <arm_neon.h> +#else +#define USE_NEON (false) +#endif + +template<typename T, typename U> +struct is_same +{ + static const bool value = false; +}; + +template<typename T> +struct is_same<T, T> // partial specialization +{ + static const bool value = true; +}; + +static inline +int32_t mulRL(int left, int32_t in, uint32_t vRL) +{ +#if USE_INLINE_ASSEMBLY + int32_t out; + if (left) { + asm( "smultb %[out], %[in], %[vRL] \n" + : [out]"=r"(out) + : [in]"%r"(in), [vRL]"r"(vRL) + : ); + } else { + asm( "smultt %[out], %[in], %[vRL] \n" + : [out]"=r"(out) + : [in]"%r"(in), [vRL]"r"(vRL) + : ); + } + return out; +#else + int16_t v = left ? static_cast<int16_t>(vRL) : static_cast<int16_t>(vRL>>16); + return static_cast<int32_t>((static_cast<int64_t>(in) * v) >> 16); +#endif +} + +static inline +int32_t mulAdd(int16_t in, int16_t v, int32_t a) +{ +#if USE_INLINE_ASSEMBLY + int32_t out; + asm( "smlabb %[out], %[v], %[in], %[a] \n" + : [out]"=r"(out) + : [in]"%r"(in), [v]"r"(v), [a]"r"(a) + : ); + return out; +#else + return a + v * in; +#endif +} + +static inline +int32_t mulAdd(int16_t in, int32_t v, int32_t a) +{ +#if USE_INLINE_ASSEMBLY + int32_t out; + asm( "smlawb %[out], %[v], %[in], %[a] \n" + : [out]"=r"(out) + : [in]"%r"(in), [v]"r"(v), [a]"r"(a) + : ); + return out; +#else + return a + static_cast<int32_t>((static_cast<int64_t>(v) * in) >> 16); +#endif +} + +static inline +int32_t mulAdd(int32_t in, int32_t v, int32_t a) +{ +#if USE_INLINE_ASSEMBLY + int32_t out; + asm( "smmla %[out], %[v], %[in], %[a] \n" + : [out]"=r"(out) + : [in]"%r"(in), [v]"r"(v), [a]"r"(a) + : ); + return out; +#else + return a + static_cast<int32_t>((static_cast<int64_t>(v) * in) >> 32); +#endif +} + +static inline +int32_t mulAddRL(int left, uint32_t inRL, int16_t v, int32_t a) +{ +#if USE_INLINE_ASSEMBLY + int32_t out; + if (left) { + asm( "smlabb %[out], %[v], %[inRL], %[a] \n" + : [out]"=r"(out) + : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) + : ); + } else { + asm( "smlabt %[out], %[v], %[inRL], %[a] \n" + : [out]"=r"(out) + : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) + : ); + } + return out; +#else + int16_t s = left ? static_cast<int16_t>(inRL) : static_cast<int16_t>(inRL>>16); + return a + v * s; +#endif +} + +static inline +int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a) +{ +#if USE_INLINE_ASSEMBLY + int32_t out; + if (left) { + asm( "smlawb %[out], %[v], %[inRL], %[a] \n" + : [out]"=r"(out) + : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) + : ); + } else { + asm( "smlawt %[out], %[v], %[inRL], %[a] \n" + : [out]"=r"(out) + : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) + : ); + } + return out; +#else + int16_t s = left ? static_cast<int16_t>(inRL) : static_cast<int16_t>(inRL>>16); + return a + static_cast<int32_t>((static_cast<int64_t>(v) * s) >> 16); +#endif +} + +}; // namespace android + +#endif /*ANDROID_AUDIO_RESAMPLER_FIR_OPS_H*/ diff --git a/services/audioflinger/AudioResamplerFirProcess.h b/services/audioflinger/AudioResamplerFirProcess.h new file mode 100644 index 0000000..38e387c --- /dev/null +++ b/services/audioflinger/AudioResamplerFirProcess.h @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H +#define ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H + +namespace android { + +// depends on AudioResamplerFirOps.h + +template<int CHANNELS, typename TC> +static inline +void mac( + int32_t& l, int32_t& r, + const TC coef, + const int16_t* samples) +{ + if (CHANNELS == 2) { + uint32_t rl = *reinterpret_cast<const uint32_t*>(samples); + l = mulAddRL(1, rl, coef, l); + r = mulAddRL(0, rl, coef, r); + } else { + r = l = mulAdd(samples[0], coef, l); + } +} + +template<int CHANNELS, typename TC> +static inline +void interpolate( + int32_t& l, int32_t& r, + const TC coef_0, const TC coef_1, + const int16_t lerp, const int16_t* samples) +{ + TC sinc; + + if (is_same<TC, int16_t>::value) { + sinc = (lerp * ((coef_1-coef_0)<<1)>>16) + coef_0; + } else { + sinc = mulAdd(lerp, (coef_1-coef_0)<<1, coef_0); + } + if (CHANNELS == 2) { + uint32_t rl = *reinterpret_cast<const uint32_t*>(samples); + l = mulAddRL(1, rl, sinc, l); + r = mulAddRL(0, rl, sinc, r); + } else { + r = l = mulAdd(samples[0], sinc, l); + } +} + +/* + * Calculates a single output sample (two stereo frames). + * + * This function computes both the positive half FIR dot product and + * the negative half FIR dot product, accumulates, and then applies the volume. + * + * This is a locked phase filter (it does not compute the interpolation). + * + * Use fir() to compute the proper coefficient pointers for a polyphase + * filter bank. + */ + +template <int CHANNELS, int STRIDE, typename TC> +static inline +void ProcessL(int32_t* const out, + int count, + const TC* coefsP, + const TC* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + int32_t l = 0; + int32_t r = 0; + do { + mac<CHANNELS>(l, r, *coefsP++, sP); + sP -= CHANNELS; + mac<CHANNELS>(l, r, *coefsN++, sN); + sN += CHANNELS; + } while (--count > 0); + out[0] += 2 * mulRL(0, l, volumeLR[0]); // Note: only use top 16b + out[1] += 2 * mulRL(0, r, volumeLR[1]); // Note: only use top 16b +} + +/* + * Calculates a single output sample (two stereo frames) interpolating phase. + * + * This function computes both the positive half FIR dot product and + * the negative half FIR dot product, accumulates, and then applies the volume. + * + * This is an interpolated phase filter. + * + * Use fir() to compute the proper coefficient pointers for a polyphase + * filter bank. + */ + +template <int CHANNELS, int STRIDE, typename TC> +static inline +void Process(int32_t* const out, + int count, + const TC* coefsP, + const TC* coefsN, + const TC* coefsP1, + const TC* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + (void) coefsP1; // suppress unused parameter warning + (void) coefsN1; + if (sizeof(*coefsP)==4) { + lerpP >>= 16; // ensure lerpP is 16b + } + int32_t l = 0; + int32_t r = 0; + for (size_t i = 0; i < count; ++i) { + interpolate<CHANNELS>(l, r, coefsP[0], coefsP[count], lerpP, sP); + coefsP++; + sP -= CHANNELS; + interpolate<CHANNELS>(l, r, coefsN[count], coefsN[0], lerpP, sN); + coefsN++; + sN += CHANNELS; + } + out[0] += 2 * mulRL(0, l, volumeLR[0]); // Note: only use top 16b + out[1] += 2 * mulRL(0, r, volumeLR[1]); // Note: only use top 16b +} + +/* + * Calculates a single output sample (two stereo frames) from input sample pointer. + * + * This sets up the params for the accelerated Process() and ProcessL() + * functions to do the appropriate dot products. + * + * @param out should point to the output buffer with at least enough space for 2 output frames. + * + * @param phase is the fractional distance between input samples for interpolation: + * phase >= 0 && phase < phaseWrapLimit. It can be thought of as a rational fraction + * of phase/phaseWrapLimit. + * + * @param phaseWrapLimit is #polyphases<<coefShift, where #polyphases is the number of polyphases + * in the polyphase filter. Likewise, #polyphases can be obtained as (phaseWrapLimit>>coefShift). + * + * @param coefShift gives the bit alignment of the polyphase index in the phase parameter. + * + * @param halfNumCoefs is the half the number of coefficients per polyphase filter. Since the + * overall filterbank is odd-length symmetric, only halfNumCoefs need be stored. + * + * @param coefs is the polyphase filter bank, starting at from polyphase index 0, and ranging to + * and including the #polyphases. Each polyphase of the filter has half-length halfNumCoefs + * (due to symmetry). The total size of the filter bank in coefficients is + * (#polyphases+1)*halfNumCoefs. + * + * The filter bank coefs should be aligned to a minimum of 16 bytes (preferrably to cache line). + * + * The coefs should be attenuated (to compensate for passband ripple) + * if storing back into the native format. + * + * @param samples are unaligned input samples. The position is in the "middle" of the + * sample array with respect to the FIR filter: + * the negative half of the filter is dot product from samples+1 to samples+halfNumCoefs; + * the positive half of the filter is dot product from samples to samples-halfNumCoefs+1. + * + * @param volumeLR is a pointer to an array of two 32 bit volume values, one per stereo channel, + * expressed as a S32 integer. A negative value inverts the channel 180 degrees. + * The pointer volumeLR should be aligned to a minimum of 8 bytes. + * A typical value for volume is 0x1000 to align to a unity gain output of 20.12. + * + * In between calls to filterCoefficient, the phase is incremented by phaseIncrement, where + * phaseIncrement is calculated as inputSampling * phaseWrapLimit / outputSampling. + * + * The filter polyphase index is given by indexP = phase >> coefShift. Due to + * odd length symmetric filter, the polyphase index of the negative half depends on + * whether interpolation is used. + * + * The fractional siting between the polyphase indices is given by the bits below coefShift: + * + * lerpP = phase << 32 - coefShift >> 1; // for 32 bit unsigned phase multiply + * lerpP = phase << 32 - coefShift >> 17; // for 16 bit unsigned phase multiply + * + * For integer types, this is expressed as: + * + * lerpP = phase << sizeof(phase)*8 - coefShift + * >> (sizeof(phase)-sizeof(*coefs))*8 + 1; + * + */ + +template<int CHANNELS, bool LOCKED, int STRIDE, typename TC> +static inline +void fir(int32_t* const out, + const uint32_t phase, const uint32_t phaseWrapLimit, + const int coefShift, const int halfNumCoefs, const TC* const coefs, + const int16_t* const samples, const int32_t* const volumeLR) +{ + // NOTE: be very careful when modifying the code here. register + // pressure is very high and a small change might cause the compiler + // to generate far less efficient code. + // Always sanity check the result with objdump or test-resample. + + if (LOCKED) { + // locked polyphase (no interpolation) + // Compute the polyphase filter index on the positive and negative side. + uint32_t indexP = phase >> coefShift; + uint32_t indexN = (phaseWrapLimit - phase) >> coefShift; + const TC* coefsP = coefs + indexP*halfNumCoefs; + const TC* coefsN = coefs + indexN*halfNumCoefs; + const int16_t* sP = samples; + const int16_t* sN = samples + CHANNELS; + + // dot product filter. + ProcessL<CHANNELS, STRIDE>(out, + halfNumCoefs, coefsP, coefsN, sP, sN, volumeLR); + } else { + // interpolated polyphase + // Compute the polyphase filter index on the positive and negative side. + uint32_t indexP = phase >> coefShift; + uint32_t indexN = (phaseWrapLimit - phase - 1) >> coefShift; // one's complement. + const TC* coefsP = coefs + indexP*halfNumCoefs; + const TC* coefsN = coefs + indexN*halfNumCoefs; + const TC* coefsP1 = coefsP + halfNumCoefs; + const TC* coefsN1 = coefsN + halfNumCoefs; + const int16_t* sP = samples; + const int16_t* sN = samples + CHANNELS; + + // Interpolation fraction lerpP derived by shifting all the way up and down + // to clear the appropriate bits and align to the appropriate level + // for the integer multiply. The constants should resolve in compile time. + // + // The interpolated filter coefficient is derived as follows for the pos/neg half: + // + // interpolated[P] = index[P]*lerpP + index[P+1]*(1-lerpP) + // interpolated[N] = index[N+1]*lerpP + index[N]*(1-lerpP) + uint32_t lerpP = phase << (sizeof(phase)*8 - coefShift) + >> ((sizeof(phase)-sizeof(*coefs))*8 + 1); + + // on-the-fly interpolated dot product filter + Process<CHANNELS, STRIDE>(out, + halfNumCoefs, coefsP, coefsN, coefsP1, coefsN1, sP, sN, lerpP, volumeLR); + } +} + +}; // namespace android + +#endif /*ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H*/ diff --git a/services/audioflinger/AudioResamplerFirProcessNeon.h b/services/audioflinger/AudioResamplerFirProcessNeon.h new file mode 100644 index 0000000..f311cef --- /dev/null +++ b/services/audioflinger/AudioResamplerFirProcessNeon.h @@ -0,0 +1,1149 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_NEON_H +#define ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_NEON_H + +namespace android { + +// depends on AudioResamplerFirOps.h, AudioResamplerFirProcess.h + +#if USE_NEON +// +// NEON specializations are enabled for Process() and ProcessL() +// +// TODO: Stride 16 and Stride 8 can be combined with one pass stride 8 (if necessary) +// and looping stride 16 (or vice versa). This has some polyphase coef data alignment +// issues with S16 coefs. Consider this later. + +// Macros to save a mono/stereo accumulator sample in q0 (and q4) as stereo out. +#define ASSEMBLY_ACCUMULATE_MONO \ + "vld1.s32 {d2}, [%[vLR]:64] \n"/* (1) load volumes */\ + "vld1.s32 {d3}, %[out] \n"/* (2) unaligned load the output */\ + "vpadd.s32 d0, d0, d1 \n"/* (1) add all 4 partial sums */\ + "vpadd.s32 d0, d0, d0 \n"/* (1+4d) and replicate L/R */\ + "vqrdmulh.s32 d0, d0, d2 \n"/* (2+3d) apply volume */\ + "vqadd.s32 d3, d3, d0 \n"/* (1+4d) accumulate result (saturating) */\ + "vst1.s32 {d3}, %[out] \n"/* (2+2d) store result */ + +#define ASSEMBLY_ACCUMULATE_STEREO \ + "vld1.s32 {d2}, [%[vLR]:64] \n"/* (1) load volumes*/\ + "vld1.s32 {d3}, %[out] \n"/* (2) unaligned load the output*/\ + "vpadd.s32 d0, d0, d1 \n"/* (1) add all 4 partial sums from q0*/\ + "vpadd.s32 d8, d8, d9 \n"/* (1) add all 4 partial sums from q4*/\ + "vpadd.s32 d0, d0, d8 \n"/* (1+4d) combine into L/R*/\ + "vqrdmulh.s32 d0, d0, d2 \n"/* (2+3d) apply volume*/\ + "vqadd.s32 d3, d3, d0 \n"/* (1+4d) accumulate result (saturating)*/\ + "vst1.s32 {d3}, %[out] \n"/* (2+2d)store result*/ + +template <> +inline void ProcessL<1, 16>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// (0 - combines+) accumulator = 0 + + "1: \n" + + "vld1.16 {q2}, [%[sP]] \n"// (2+0d) load 8 16-bits mono samples + "vld1.16 {q3}, [%[sN]]! \n"// (2) load 8 16-bits mono samples + "vld1.16 {q8}, [%[coefsP0]:128]! \n"// (1) load 8 16-bits coefs + "vld1.16 {q10}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs + + "vrev64.16 q2, q2 \n"// (1) reverse s3, s2, s1, s0, s7, s6, s5, s4 + + // reordering the vmal to do d6, d7 before d4, d5 is slower(?) + "vmlal.s16 q0, d4, d17 \n"// (1+0d) multiply (reversed)samples by coef + "vmlal.s16 q0, d5, d16 \n"// (1) multiply (reversed)samples by coef + "vmlal.s16 q0, d6, d20 \n"// (1) multiply neg samples + "vmlal.s16 q0, d7, d21 \n"// (1) multiply neg samples + + // moving these ARM instructions before neon above seems to be slower + "subs %[count], %[count], #8 \n"// (1) update loop counter + "sub %[sP], %[sP], #16 \n"// (0) move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q10" + ); +} + +template <> +inline void ProcessL<2, 16>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// (1) acc_L = 0 + "veor q4, q4, q4 \n"// (0 combines+) acc_R = 0 + + "1: \n" + + "vld2.16 {q2, q3}, [%[sP]] \n"// (3+0d) load 8 16-bits stereo samples + "vld2.16 {q5, q6}, [%[sN]]! \n"// (3) load 8 16-bits stereo samples + "vld1.16 {q8}, [%[coefsP0]:128]! \n"// (1) load 8 16-bits coefs + "vld1.16 {q10}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs + + "vrev64.16 q2, q2 \n"// (1) reverse 8 frames of the left positive + "vrev64.16 q3, q3 \n"// (0 combines+) reverse right positive + + "vmlal.s16 q0, d4, d17 \n"// (1) multiply (reversed) samples left + "vmlal.s16 q0, d5, d16 \n"// (1) multiply (reversed) samples left + "vmlal.s16 q4, d6, d17 \n"// (1) multiply (reversed) samples right + "vmlal.s16 q4, d7, d16 \n"// (1) multiply (reversed) samples right + "vmlal.s16 q0, d10, d20 \n"// (1) multiply samples left + "vmlal.s16 q0, d11, d21 \n"// (1) multiply samples left + "vmlal.s16 q4, d12, d20 \n"// (1) multiply samples right + "vmlal.s16 q4, d13, d21 \n"// (1) multiply samples right + + // moving these ARM before neon seems to be slower + "subs %[count], %[count], #8 \n"// (1) update loop counter + "sub %[sP], %[sP], #32 \n"// (0) move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q4", "q5", "q6", + "q8", "q10" + ); +} + +template <> +inline void Process<1, 16>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* coefsP1, + const int16_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase S32 Q15 + "veor q0, q0, q0 \n"// (0 - combines+) accumulator = 0 + + "1: \n" + + "vld1.16 {q2}, [%[sP]] \n"// (2+0d) load 8 16-bits mono samples + "vld1.16 {q3}, [%[sN]]! \n"// (2) load 8 16-bits mono samples + "vld1.16 {q8}, [%[coefsP0]:128]! \n"// (1) load 8 16-bits coefs + "vld1.16 {q9}, [%[coefsP1]:128]! \n"// (1) load 8 16-bits coefs for interpolation + "vld1.16 {q10}, [%[coefsN1]:128]! \n"// (1) load 8 16-bits coefs + "vld1.16 {q11}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs for interpolation + + "vsub.s16 q9, q9, q8 \n"// (1) interpolate (step1) 1st set of coefs + "vsub.s16 q11, q11, q10 \n"// (1) interpolate (step1) 2nd set of coets + + "vqrdmulh.s16 q9, q9, d2[0] \n"// (2) interpolate (step2) 1st set of coefs + "vqrdmulh.s16 q11, q11, d2[0] \n"// (2) interpolate (step2) 2nd set of coefs + + "vrev64.16 q2, q2 \n"// (1) reverse s3, s2, s1, s0, s7, s6, s5, s4 + + "vadd.s16 q8, q8, q9 \n"// (1+2d) interpolate (step3) 1st set + "vadd.s16 q10, q10, q11 \n"// (1+1d) interpolate (step3) 2nd set + + // reordering the vmal to do d6, d7 before d4, d5 is slower(?) + "vmlal.s16 q0, d4, d17 \n"// (1+0d) multiply reversed samples by coef + "vmlal.s16 q0, d5, d16 \n"// (1) multiply reversed samples by coef + "vmlal.s16 q0, d6, d20 \n"// (1) multiply neg samples + "vmlal.s16 q0, d7, d21 \n"// (1) multiply neg samples + + // moving these ARM instructions before neon above seems to be slower + "subs %[count], %[count], #8 \n"// (1) update loop counter + "sub %[sP], %[sP], #16 \n"// (0) move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [coefsP1] "+r" (coefsP1), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q9", "q10", "q11" + ); +} + +template <> +inline void Process<2, 16>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* coefsP1, + const int16_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase + "veor q0, q0, q0 \n"// (1) acc_L = 0 + "veor q4, q4, q4 \n"// (0 combines+) acc_R = 0 + + "1: \n" + + "vld2.16 {q2, q3}, [%[sP]] \n"// (3+0d) load 8 16-bits stereo samples + "vld2.16 {q5, q6}, [%[sN]]! \n"// (3) load 8 16-bits stereo samples + "vld1.16 {q8}, [%[coefsP0]:128]! \n"// (1) load 8 16-bits coefs + "vld1.16 {q9}, [%[coefsP1]:128]! \n"// (1) load 8 16-bits coefs for interpolation + "vld1.16 {q10}, [%[coefsN1]:128]! \n"// (1) load 8 16-bits coefs + "vld1.16 {q11}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs for interpolation + + "vsub.s16 q9, q9, q8 \n"// (1) interpolate (step1) 1st set of coefs + "vsub.s16 q11, q11, q10 \n"// (1) interpolate (step1) 2nd set of coets + + "vqrdmulh.s16 q9, q9, d2[0] \n"// (2) interpolate (step2) 1st set of coefs + "vqrdmulh.s16 q11, q11, d2[0] \n"// (2) interpolate (step2) 2nd set of coefs + + "vrev64.16 q2, q2 \n"// (1) reverse 8 frames of the left positive + "vrev64.16 q3, q3 \n"// (1) reverse 8 frames of the right positive + + "vadd.s16 q8, q8, q9 \n"// (1+1d) interpolate (step3) 1st set + "vadd.s16 q10, q10, q11 \n"// (1+1d) interpolate (step3) 2nd set + + "vmlal.s16 q0, d4, d17 \n"// (1) multiply reversed samples left + "vmlal.s16 q0, d5, d16 \n"// (1) multiply reversed samples left + "vmlal.s16 q4, d6, d17 \n"// (1) multiply reversed samples right + "vmlal.s16 q4, d7, d16 \n"// (1) multiply reversed samples right + "vmlal.s16 q0, d10, d20 \n"// (1) multiply samples left + "vmlal.s16 q0, d11, d21 \n"// (1) multiply samples left + "vmlal.s16 q4, d12, d20 \n"// (1) multiply samples right + "vmlal.s16 q4, d13, d21 \n"// (1) multiply samples right + + // moving these ARM before neon seems to be slower + "subs %[count], %[count], #8 \n"// (1) update loop counter + "sub %[sP], %[sP], #32 \n"// (0) move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [coefsP1] "+r" (coefsP1), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q4", "q5", "q6", + "q8", "q9", "q10", "q11" + ); +} + +template <> +inline void ProcessL<1, 16>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// result, initialize to 0 + + "1: \n" + + "vld1.16 {q2}, [%[sP]] \n"// load 8 16-bits mono samples + "vld1.16 {q3}, [%[sN]]! \n"// load 8 16-bits mono samples + "vld1.32 {q8, q9}, [%[coefsP0]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q10, q11}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs + + "vrev64.16 q2, q2 \n"// reverse 8 frames of the positive side + + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d7, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q13, q13, q14 \n"// accumulate result + "vadd.s32 q0, q0, q15 \n"// accumulate result + "vadd.s32 q0, q0, q13 \n"// accumulate result + + "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples + "subs %[count], %[count], #8 \n"// update loop counter + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15" + ); +} + +template <> +inline void ProcessL<2, 16>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// result, initialize to 0 + "veor q4, q4, q4 \n"// result, initialize to 0 + + "1: \n" + + "vld2.16 {q2, q3}, [%[sP]] \n"// load 4 16-bits stereo samples + "vld2.16 {q5, q6}, [%[sN]]! \n"// load 4 16-bits stereo samples + "vld1.32 {q8, q9}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q10, q11}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs + + "vrev64.16 q2, q2 \n"// reverse 8 frames of the positive side + "vrev64.16 q3, q3 \n"// reverse 8 frames of the positive side + + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d10, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d11, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q13, q13, q14 \n"// accumulate result + "vadd.s32 q0, q0, q15 \n"// (+1) accumulate result + "vadd.s32 q0, q0, q13 \n"// (+1) accumulate result + + "vshll.s16 q12, d6, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d7, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d12, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d13, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef + + "vadd.s32 q4, q4, q12 \n"// accumulate result + "vadd.s32 q13, q13, q14 \n"// accumulate result + "vadd.s32 q4, q4, q15 \n"// (+1) accumulate result + "vadd.s32 q4, q4, q13 \n"// (+1) accumulate result + + "subs %[count], %[count], #8 \n"// update loop counter + "sub %[sP], %[sP], #32 \n"// move pointer to next set of samples + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q4", "q5", "q6", + "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15" + ); +} + +template <> +inline void Process<1, 16>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int32_t* coefsP1, + const int32_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase + "veor q0, q0, q0 \n"// result, initialize to 0 + + "1: \n" + + "vld1.16 {q2}, [%[sP]] \n"// load 8 16-bits mono samples + "vld1.16 {q3}, [%[sN]]! \n"// load 8 16-bits mono samples + "vld1.32 {q8, q9}, [%[coefsP0]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q12, q13}, [%[coefsP1]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q10, q11}, [%[coefsN1]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q14, q15}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs + + "vsub.s32 q12, q12, q8 \n"// interpolate (step1) + "vsub.s32 q13, q13, q9 \n"// interpolate (step1) + "vsub.s32 q14, q14, q10 \n"// interpolate (step1) + "vsub.s32 q15, q15, q11 \n"// interpolate (step1) + + "vqrdmulh.s32 q12, q12, d2[0] \n"// interpolate (step2) + "vqrdmulh.s32 q13, q13, d2[0] \n"// interpolate (step2) + "vqrdmulh.s32 q14, q14, d2[0] \n"// interpolate (step2) + "vqrdmulh.s32 q15, q15, d2[0] \n"// interpolate (step2) + + "vadd.s32 q8, q8, q12 \n"// interpolate (step3) + "vadd.s32 q9, q9, q13 \n"// interpolate (step3) + "vadd.s32 q10, q10, q14 \n"// interpolate (step3) + "vadd.s32 q11, q11, q15 \n"// interpolate (step3) + + "vrev64.16 q2, q2 \n"// reverse 8 frames of the positive side + + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d7, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q13, q13, q14 \n"// accumulate result + "vadd.s32 q0, q0, q15 \n"// accumulate result + "vadd.s32 q0, q0, q13 \n"// accumulate result + + "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples + "subs %[count], %[count], #8 \n"// update loop counter + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [coefsP1] "+r" (coefsP1), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15" + ); +} + +template <> +inline void Process<2, 16>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int32_t* coefsP1, + const int32_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase + "veor q0, q0, q0 \n"// result, initialize to 0 + "veor q4, q4, q4 \n"// result, initialize to 0 + + "1: \n" + + "vld2.16 {q2, q3}, [%[sP]] \n"// load 4 16-bits stereo samples + "vld2.16 {q5, q6}, [%[sN]]! \n"// load 4 16-bits stereo samples + "vld1.32 {q8, q9}, [%[coefsP0]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q12, q13}, [%[coefsP1]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q10, q11}, [%[coefsN1]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q14, q15}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs + + "vsub.s32 q12, q12, q8 \n"// interpolate (step1) + "vsub.s32 q13, q13, q9 \n"// interpolate (step1) + "vsub.s32 q14, q14, q10 \n"// interpolate (step1) + "vsub.s32 q15, q15, q11 \n"// interpolate (step1) + + "vqrdmulh.s32 q12, q12, d2[0] \n"// interpolate (step2) + "vqrdmulh.s32 q13, q13, d2[0] \n"// interpolate (step2) + "vqrdmulh.s32 q14, q14, d2[0] \n"// interpolate (step2) + "vqrdmulh.s32 q15, q15, d2[0] \n"// interpolate (step2) + + "vadd.s32 q8, q8, q12 \n"// interpolate (step3) + "vadd.s32 q9, q9, q13 \n"// interpolate (step3) + "vadd.s32 q10, q10, q14 \n"// interpolate (step3) + "vadd.s32 q11, q11, q15 \n"// interpolate (step3) + + "vrev64.16 q2, q2 \n"// reverse 8 frames of the positive side + "vrev64.16 q3, q3 \n"// reverse 8 frames of the positive side + + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d10, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d11, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q13, q13, q14 \n"// accumulate result + "vadd.s32 q0, q0, q15 \n"// (+1) accumulate result + "vadd.s32 q0, q0, q13 \n"// (+1) accumulate result + + "vshll.s16 q12, d6, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d7, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d12, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d13, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef + + "vadd.s32 q4, q4, q12 \n"// accumulate result + "vadd.s32 q13, q13, q14 \n"// accumulate result + "vadd.s32 q4, q4, q15 \n"// (+1) accumulate result + "vadd.s32 q4, q4, q13 \n"// (+1) accumulate result + + "subs %[count], %[count], #8 \n"// update loop counter + "sub %[sP], %[sP], #32 \n"// move pointer to next set of samples + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [coefsP1] "+r" (coefsP1), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q4", "q5", "q6", + "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15" + ); +} + +template <> +inline void ProcessL<1, 8>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// (0 - combines+) accumulator = 0 + + "1: \n" + + "vld1.16 {d4}, [%[sP]] \n"// (2+0d) load 4 16-bits mono samples + "vld1.16 {d6}, [%[sN]]! \n"// (2) load 4 16-bits mono samples + "vld1.16 {d16}, [%[coefsP0]:64]! \n"// (1) load 4 16-bits coefs + "vld1.16 {d20}, [%[coefsN0]:64]! \n"// (1) load 4 16-bits coefs + + "vrev64.16 d4, d4 \n"// (1) reversed s3, s2, s1, s0, s7, s6, s5, s4 + + // reordering the vmal to do d6, d7 before d4, d5 is slower(?) + "vmlal.s16 q0, d4, d16 \n"// (1) multiply (reversed)samples by coef + "vmlal.s16 q0, d6, d20 \n"// (1) multiply neg samples + + // moving these ARM instructions before neon above seems to be slower + "subs %[count], %[count], #4 \n"// (1) update loop counter + "sub %[sP], %[sP], #8 \n"// (0) move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q10" + ); +} + +template <> +inline void ProcessL<2, 8>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// (1) acc_L = 0 + "veor q4, q4, q4 \n"// (0 combines+) acc_R = 0 + + "1: \n" + + "vld2.16 {d4, d5}, [%[sP]] \n"// (2+0d) load 8 16-bits stereo samples + "vld2.16 {d6, d7}, [%[sN]]! \n"// (2) load 8 16-bits stereo samples + "vld1.16 {d16}, [%[coefsP0]:64]! \n"// (1) load 8 16-bits coefs + "vld1.16 {d20}, [%[coefsN0]:64]! \n"// (1) load 8 16-bits coefs + + "vrev64.16 q2, q2 \n"// (1) reverse 8 frames of the left positive + + "vmlal.s16 q0, d4, d16 \n"// (1) multiply (reversed) samples left + "vmlal.s16 q4, d5, d16 \n"// (1) multiply (reversed) samples right + "vmlal.s16 q0, d6, d20 \n"// (1) multiply samples left + "vmlal.s16 q4, d7, d20 \n"// (1) multiply samples right + + // moving these ARM before neon seems to be slower + "subs %[count], %[count], #4 \n"// (1) update loop counter + "sub %[sP], %[sP], #16 \n"// (0) move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q4", "q5", "q6", + "q8", "q10" + ); +} + +template <> +inline void Process<1, 8>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* coefsP1, + const int16_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase S32 Q15 + "veor q0, q0, q0 \n"// (0 - combines+) accumulator = 0 + + "1: \n" + + "vld1.16 {d4}, [%[sP]] \n"// (2+0d) load 4 16-bits mono samples + "vld1.16 {d6}, [%[sN]]! \n"// (2) load 4 16-bits mono samples + "vld1.16 {d16}, [%[coefsP0]:64]! \n"// (1) load 4 16-bits coefs + "vld1.16 {d17}, [%[coefsP1]:64]! \n"// (1) load 4 16-bits coefs for interpolation + "vld1.16 {d20}, [%[coefsN1]:64]! \n"// (1) load 4 16-bits coefs + "vld1.16 {d21}, [%[coefsN0]:64]! \n"// (1) load 4 16-bits coefs for interpolation + + "vsub.s16 d17, d17, d16 \n"// (1) interpolate (step1) 1st set of coefs + "vsub.s16 d21, d21, d20 \n"// (1) interpolate (step1) 2nd set of coets + + "vqrdmulh.s16 d17, d17, d2[0] \n"// (2) interpolate (step2) 1st set of coefs + "vqrdmulh.s16 d21, d21, d2[0] \n"// (2) interpolate (step2) 2nd set of coefs + + "vrev64.16 d4, d4 \n"// (1) reverse s3, s2, s1, s0, s7, s6, s5, s4 + + "vadd.s16 d16, d16, d17 \n"// (1+2d) interpolate (step3) 1st set + "vadd.s16 d20, d20, d21 \n"// (1+1d) interpolate (step3) 2nd set + + // reordering the vmal to do d6, d7 before d4, d5 is slower(?) + "vmlal.s16 q0, d4, d16 \n"// (1+0d) multiply (reversed)by coef + "vmlal.s16 q0, d6, d20 \n"// (1) multiply neg samples + + // moving these ARM instructions before neon above seems to be slower + "subs %[count], %[count], #4 \n"// (1) update loop counter + "sub %[sP], %[sP], #8 \n"// move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [coefsP1] "+r" (coefsP1), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q9", "q10", "q11" + ); +} + +template <> +inline void Process<2, 8>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* coefsP1, + const int16_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase + "veor q0, q0, q0 \n"// (1) acc_L = 0 + "veor q4, q4, q4 \n"// (0 combines+) acc_R = 0 + + "1: \n" + + "vld2.16 {d4, d5}, [%[sP]] \n"// (3+0d) load 8 16-bits stereo samples + "vld2.16 {d6, d7}, [%[sN]]! \n"// (3) load 8 16-bits stereo samples + "vld1.16 {d16}, [%[coefsP0]:64]! \n"// (1) load 8 16-bits coefs + "vld1.16 {d17}, [%[coefsP1]:64]! \n"// (1) load 8 16-bits coefs for interpolation + "vld1.16 {d20}, [%[coefsN1]:64]! \n"// (1) load 8 16-bits coefs + "vld1.16 {d21}, [%[coefsN0]:64]! \n"// (1) load 8 16-bits coefs for interpolation + + "vsub.s16 d17, d17, d16 \n"// (1) interpolate (step1) 1st set of coefs + "vsub.s16 d21, d21, d20 \n"// (1) interpolate (step1) 2nd set of coets + + "vqrdmulh.s16 d17, d17, d2[0] \n"// (2) interpolate (step2) 1st set of coefs + "vqrdmulh.s16 d21, d21, d2[0] \n"// (2) interpolate (step2) 2nd set of coefs + + "vrev64.16 q2, q2 \n"// (1) reverse 8 frames of the left positive + + "vadd.s16 d16, d16, d17 \n"// (1+1d) interpolate (step3) 1st set + "vadd.s16 d20, d20, d21 \n"// (1+1d) interpolate (step3) 2nd set + + "vmlal.s16 q0, d4, d16 \n"// (1) multiply (reversed) samples left + "vmlal.s16 q4, d5, d16 \n"// (1) multiply (reversed) samples right + "vmlal.s16 q0, d6, d20 \n"// (1) multiply samples left + "vmlal.s16 q4, d7, d20 \n"// (1) multiply samples right + + // moving these ARM before neon seems to be slower + "subs %[count], %[count], #4 \n"// (1) update loop counter + "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [coefsP1] "+r" (coefsP1), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q4", "q5", "q6", + "q8", "q9", "q10", "q11" + ); +} + +template <> +inline void ProcessL<1, 8>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// result, initialize to 0 + + "1: \n" + + "vld1.16 {d4}, [%[sP]] \n"// load 4 16-bits mono samples + "vld1.16 {d6}, [%[sN]]! \n"// load 4 16-bits mono samples + "vld1.32 {q8}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q10}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs + + "vrev64.16 d4, d4 \n"// reverse 2 frames of the positive side + + "vshll.s16 q12, d4, #15 \n"// (stall) extend samples to 31 bits + "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q0, q0, q14 \n"// (stall) accumulate result + + "subs %[count], %[count], #4 \n"// update loop counter + "sub %[sP], %[sP], #8 \n"// move pointer to next set of samples + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q9", "q10", "q11", + "q12", "q14" + ); +} + +template <> +inline void ProcessL<2, 8>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// result, initialize to 0 + "veor q4, q4, q4 \n"// result, initialize to 0 + + "1: \n" + + "vld2.16 {d4, d5}, [%[sP]] \n"// load 4 16-bits stereo samples + "vld2.16 {d6, d7}, [%[sN]]! \n"// load 4 16-bits stereo samples + "vld1.32 {q8}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q10}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs + + "vrev64.16 q2, q2 \n"// reverse 2 frames of the positive side + + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d7, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q8 \n"// multiply samples by coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by coef + "vqrdmulh.s32 q15, q15, q10 \n"// multiply samples by coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q4, q4, q13 \n"// accumulate result + "vadd.s32 q0, q0, q14 \n"// accumulate result + "vadd.s32 q4, q4, q15 \n"// accumulate result + + "subs %[count], %[count], #4 \n"// update loop counter + "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", "q4", + "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15" + ); +} + +template <> +inline void Process<1, 8>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int32_t* coefsP1, + const int32_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase + "veor q0, q0, q0 \n"// result, initialize to 0 + + "1: \n" + + "vld1.16 {d4}, [%[sP]] \n"// load 4 16-bits mono samples + "vld1.16 {d6}, [%[sN]]! \n"// load 4 16-bits mono samples + "vld1.32 {q8}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q9}, [%[coefsP1]:128]! \n"// load 4 32-bits coefs for interpolation + "vld1.32 {q10}, [%[coefsN1]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q11}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs for interpolation + + "vrev64.16 d4, d4 \n"// reverse 2 frames of the positive side + + "vsub.s32 q9, q9, q8 \n"// interpolate (step1) 1st set of coefs + "vsub.s32 q11, q11, q10 \n"// interpolate (step1) 2nd set of coets + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q9, q9, d2[0] \n"// interpolate (step2) 1st set of coefs + "vqrdmulh.s32 q11, q11, d2[0] \n"// interpolate (step2) 2nd set of coefs + "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits + + "vadd.s32 q8, q8, q9 \n"// interpolate (step3) 1st set + "vadd.s32 q10, q10, q11 \n"// interpolate (step4) 2nd set + + "vqrdmulh.s32 q12, q12, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q0, q0, q14 \n"// accumulate result + + "subs %[count], %[count], #4 \n"// update loop counter + "sub %[sP], %[sP], #8 \n"// move pointer to next set of samples + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsP1] "+r" (coefsP1), + [coefsN0] "+r" (coefsN), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q9", "q10", "q11", + "q12", "q14" + ); +} + +template <> +inline +void Process<2, 8>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int32_t* coefsP1, + const int32_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase + "veor q0, q0, q0 \n"// result, initialize to 0 + "veor q4, q4, q4 \n"// result, initialize to 0 + + "1: \n" + "vld2.16 {d4, d5}, [%[sP]] \n"// load 4 16-bits stereo samples + "vld2.16 {d6, d7}, [%[sN]]! \n"// load 4 16-bits stereo samples + "vld1.32 {q8}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q9}, [%[coefsP1]:128]! \n"// load 4 32-bits coefs for interpolation + "vld1.32 {q10}, [%[coefsN1]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q11}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs for interpolation + + "vrev64.16 q2, q2 \n"// (reversed) 2 frames of the positive side + + "vsub.s32 q9, q9, q8 \n"// interpolate (step1) 1st set of coefs + "vsub.s32 q11, q11, q10 \n"// interpolate (step1) 2nd set of coets + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q9, q9, d2[0] \n"// interpolate (step2) 1st set of coefs + "vqrdmulh.s32 q11, q11, d2[1] \n"// interpolate (step3) 2nd set of coefs + "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d7, #15 \n"// extend samples to 31 bits + + "vadd.s32 q8, q8, q9 \n"// interpolate (step3) 1st set + "vadd.s32 q10, q10, q11 \n"// interpolate (step4) 2nd set + + "vqrdmulh.s32 q12, q12, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q10 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q4, q4, q13 \n"// accumulate result + "vadd.s32 q0, q0, q14 \n"// accumulate result + "vadd.s32 q4, q4, q15 \n"// accumulate result + + "subs %[count], %[count], #4 \n"// update loop counter + "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsP1] "+r" (coefsP1), + [coefsN0] "+r" (coefsN), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", "q4", + "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15" + ); +} + +#endif //USE_NEON + +}; // namespace android + +#endif /*ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_NEON_H*/ diff --git a/services/audioflinger/Threads.cpp b/services/audioflinger/Threads.cpp index 35b8575..01b90a8 100644 --- a/services/audioflinger/Threads.cpp +++ b/services/audioflinger/Threads.cpp @@ -3047,15 +3047,8 @@ AudioFlinger::PlaybackThread::mixer_state AudioFlinger::MixerThread::prepareTrac (mMixerStatusIgnoringFastTracks == MIXER_TRACKS_READY)) { minFrames = desiredFrames; } - // It's not safe to call framesReady() for a static buffer track, so assume it's ready - size_t framesReady; - if (track->sharedBuffer() == 0) { - framesReady = track->framesReady(); - } else if (track->isStopped()) { - framesReady = 0; - } else { - framesReady = 1; - } + + size_t framesReady = track->framesReady(); if ((framesReady >= minFrames) && track->isReady() && !track->isPaused() && !track->isTerminated()) { diff --git a/services/audioflinger/Tracks.cpp b/services/audioflinger/Tracks.cpp index 53196c8..a5b9ac5 100644 --- a/services/audioflinger/Tracks.cpp +++ b/services/audioflinger/Tracks.cpp @@ -564,7 +564,7 @@ size_t AudioFlinger::PlaybackThread::Track::framesReleased() const // Don't call for fast tracks; the framesReady() could result in priority inversion bool AudioFlinger::PlaybackThread::Track::isReady() const { - if (mFillingUpStatus != FS_FILLING || isStopped() || isPausing()) { + if (mFillingUpStatus != FS_FILLING || isStopped() || isPausing() || isStopping()) { return true; } diff --git a/services/audioflinger/test-resample.cpp b/services/audioflinger/test-resample.cpp index 0d00a0f..66fcd90 100644 --- a/services/audioflinger/test-resample.cpp +++ b/services/audioflinger/test-resample.cpp @@ -33,8 +33,9 @@ using namespace android; bool gVerbose = false; static int usage(const char* name) { - fprintf(stderr,"Usage: %s [-p] [-h] [-v] [-s] [-q {dq|lq|mq|hq|vhq}] [-i input-sample-rate] " - "[-o output-sample-rate] [<input-file>] <output-file>\n", name); + fprintf(stderr,"Usage: %s [-p] [-h] [-v] [-s] [-q {dq|lq|mq|hq|vhq|dlq|dmq|dhq}]" + " [-i input-sample-rate] [-o output-sample-rate] [<input-file>]" + " <output-file>\n", name); fprintf(stderr," -p enable profiling\n"); fprintf(stderr," -h create wav file\n"); fprintf(stderr," -v verbose : log buffer provider calls\n"); @@ -45,6 +46,9 @@ static int usage(const char* name) { fprintf(stderr," mq : medium quality\n"); fprintf(stderr," hq : high quality\n"); fprintf(stderr," vhq : very high quality\n"); + fprintf(stderr," dlq : dynamic low quality\n"); + fprintf(stderr," dmq : dynamic medium quality\n"); + fprintf(stderr," dhq : dynamic high quality\n"); fprintf(stderr," -i input file sample rate (ignored if input file is specified)\n"); fprintf(stderr," -o output file sample rate\n"); return -1; @@ -53,7 +57,8 @@ static int usage(const char* name) { int main(int argc, char* argv[]) { const char* const progname = argv[0]; - bool profiling = false; + bool profileResample = false; + bool profileFilter = false; bool writeHeader = false; int channels = 1; int input_freq = 0; @@ -61,10 +66,13 @@ int main(int argc, char* argv[]) { AudioResampler::src_quality quality = AudioResampler::DEFAULT_QUALITY; int ch; - while ((ch = getopt(argc, argv, "phvsq:i:o:")) != -1) { + while ((ch = getopt(argc, argv, "pfhvsq:i:o:")) != -1) { switch (ch) { case 'p': - profiling = true; + profileResample = true; + break; + case 'f': + profileFilter = true; break; case 'h': writeHeader = true; @@ -86,6 +94,12 @@ int main(int argc, char* argv[]) { quality = AudioResampler::HIGH_QUALITY; else if (!strcmp(optarg, "vhq")) quality = AudioResampler::VERY_HIGH_QUALITY; + else if (!strcmp(optarg, "dlq")) + quality = AudioResampler::DYN_LOW_QUALITY; + else if (!strcmp(optarg, "dmq")) + quality = AudioResampler::DYN_MED_QUALITY; + else if (!strcmp(optarg, "dhq")) + quality = AudioResampler::DYN_HIGH_QUALITY; else { usage(progname); return -1; @@ -137,6 +151,8 @@ int main(int argc, char* argv[]) { channels = info.channels; input_freq = info.samplerate; } else { + // data for testing is exactly (input sampling rate/1000)/2 seconds + // so 44.1khz input is 22.05 seconds double k = 1000; // Hz / s double time = (input_freq / 2) / k; size_t input_frames = size_t(input_freq * time); @@ -148,7 +164,7 @@ int main(int argc, char* argv[]) { double y = sin(M_PI * k * t * t); int16_t yi = floor(y * 32767.0 + 0.5); for (size_t j=0 ; j<(size_t)channels ; j++) { - in[i*channels + j] = yi / (1+j); + in[i*channels + j] = yi / (1+j); // right ch. 1/2 left ch. } } } @@ -170,6 +186,7 @@ int main(int argc, char* argv[]) { } virtual status_t getNextBuffer(Buffer* buffer, int64_t pts = kInvalidPTS) { + (void)pts; // suppress warning size_t requestedFrames = buffer->frameCount; if (requestedFrames > mNumFrames - mNextFrame) { buffer->frameCount = mNumFrames - mNextFrame; @@ -202,6 +219,11 @@ int main(int argc, char* argv[]) { mNextFrame += buffer->frameCount; mUnrel -= buffer->frameCount; } + buffer->frameCount = 0; + buffer->i16 = NULL; + } + void reset() { + mNextFrame = 0; } } provider(input_vaddr, input_size, channels); @@ -212,37 +234,110 @@ int main(int argc, char* argv[]) { size_t output_size = 2 * 4 * ((int64_t) input_frames * output_freq) / input_freq; output_size &= ~7; // always stereo, 32-bits - void* output_vaddr = malloc(output_size); - - if (profiling) { + if (profileFilter) { + // Check how fast sample rate changes are that require filter changes. + // The delta sample rate changes must indicate a downsampling ratio, + // and must be larger than 10% changes. + // + // On fast devices, filters should be generated between 0.1ms - 1ms. + // (single threaded). AudioResampler* resampler = AudioResampler::create(16, channels, - output_freq, quality); - - size_t out_frames = output_size/8; - resampler->setSampleRate(input_freq); - resampler->setVolume(0x1000, 0x1000); - - memset(output_vaddr, 0, output_size); + 8000, quality); + int looplimit = 100; timespec start, end; clock_gettime(CLOCK_MONOTONIC, &start); - resampler->resample((int*) output_vaddr, out_frames, &provider); - resampler->resample((int*) output_vaddr, out_frames, &provider); - resampler->resample((int*) output_vaddr, out_frames, &provider); - resampler->resample((int*) output_vaddr, out_frames, &provider); + for (int i = 0; i < looplimit; ++i) { + resampler->setSampleRate(9000); + resampler->setSampleRate(12000); + resampler->setSampleRate(20000); + resampler->setSampleRate(30000); + } clock_gettime(CLOCK_MONOTONIC, &end); int64_t start_ns = start.tv_sec * 1000000000LL + start.tv_nsec; int64_t end_ns = end.tv_sec * 1000000000LL + end.tv_nsec; - int64_t time = (end_ns - start_ns)/4; - printf("%f Mspl/s\n", out_frames/(time/1e9)/1e6); + int64_t time = end_ns - start_ns; + printf("%.2f sample rate changes with filter calculation/sec\n", + looplimit * 4 / (time / 1e9)); + // Check how fast sample rate changes are without filter changes. + // This should be very fast, probably 0.1us - 1us per sample rate + // change. + resampler->setSampleRate(1000); + looplimit = 1000; + clock_gettime(CLOCK_MONOTONIC, &start); + for (int i = 0; i < looplimit; ++i) { + resampler->setSampleRate(1000+i); + } + clock_gettime(CLOCK_MONOTONIC, &end); + start_ns = start.tv_sec * 1000000000LL + start.tv_nsec; + end_ns = end.tv_sec * 1000000000LL + end.tv_nsec; + time = end_ns - start_ns; + printf("%.2f sample rate changes without filter calculation/sec\n", + looplimit / (time / 1e9)); + resampler->reset(); delete resampler; } + void* output_vaddr = malloc(output_size); AudioResampler* resampler = AudioResampler::create(16, channels, output_freq, quality); size_t out_frames = output_size/8; + + /* set volume precision to 12 bits, so the volume scale is 1<<12. + * This means the "integer" part fits in the Q19.12 precision + * representation of output int32_t. + * + * Generally 0 < volumePrecision <= 14 (due to the limits of + * int16_t values for Volume). volumePrecision cannot be 0 due + * to rounding and shifts. + */ + const int volumePrecision = 12; // in bits + resampler->setSampleRate(input_freq); - resampler->setVolume(0x1000, 0x1000); + resampler->setVolume(1 << volumePrecision, 1 << volumePrecision); + + if (profileResample) { + /* + * For profiling on mobile devices, upon experimentation + * it is better to run a few trials with a shorter loop limit, + * and take the minimum time. + * + * Long tests can cause CPU temperature to build up and thermal throttling + * to reduce CPU frequency. + * + * For frequency checks (index=0, or 1, etc.): + * "cat /sys/devices/system/cpu/cpu${index}/cpufreq/scaling_*_freq" + * + * For temperature checks (index=0, or 1, etc.): + * "cat /sys/class/thermal/thermal_zone${index}/temp" + * + * Another way to avoid thermal throttling is to fix the CPU frequency + * at a lower level which prevents excessive temperatures. + */ + const int trials = 4; + const int looplimit = 4; + timespec start, end; + int64_t time; + + for (int n = 0; n < trials; ++n) { + clock_gettime(CLOCK_MONOTONIC, &start); + for (int i = 0; i < looplimit; ++i) { + resampler->resample((int*) output_vaddr, out_frames, &provider); + provider.reset(); // during benchmarking reset only the provider + } + clock_gettime(CLOCK_MONOTONIC, &end); + int64_t start_ns = start.tv_sec * 1000000000LL + start.tv_nsec; + int64_t end_ns = end.tv_sec * 1000000000LL + end.tv_nsec; + int64_t diff_ns = end_ns - start_ns; + if (n == 0 || diff_ns < time) { + time = diff_ns; // save the best out of our trials. + } + } + // Mfrms/s is "Millions of output frames per second". + printf("quality: %d channels: %d msec: %lld Mfrms/s: %.2lf\n", + quality, channels, time/1000000, out_frames * looplimit / (time / 1e9) / 1e6); + resampler->reset(); + } memset(output_vaddr, 0, output_size); if (gVerbose) { @@ -256,15 +351,31 @@ int main(int argc, char* argv[]) { if (gVerbose) { printf("reset() complete\n"); } + delete resampler; + resampler = NULL; - // down-mix (we just truncate and keep the left channel) + // mono takes left channel only + // stereo right channel is half amplitude of stereo left channel (due to input creation) int32_t* out = (int32_t*) output_vaddr; int16_t* convert = (int16_t*) malloc(out_frames * channels * sizeof(int16_t)); + + // round to half towards zero and saturate at int16 (non-dithered) + const int roundVal = (1<<(volumePrecision-1)) - 1; // volumePrecision > 0 + for (size_t i = 0; i < out_frames; i++) { - for (int j=0 ; j<channels ; j++) { - int32_t s = out[i * 2 + j] >> 12; - if (s > 32767) s = 32767; - else if (s < -32768) s = -32768; + for (int j = 0; j < channels; j++) { + int32_t s = out[i * 2 + j] + roundVal; // add offset here + if (s < 0) { + s = (s + 1) >> volumePrecision; // round to 0 + if (s < -32768) { + s = -32768; + } + } else { + s = s >> volumePrecision; + if (s > 32767) { + s = 32767; + } + } convert[i * channels + j] = int16_t(s); } } diff --git a/services/camera/libcameraservice/api2/CameraDeviceClient.cpp b/services/camera/libcameraservice/api2/CameraDeviceClient.cpp index 1cdf8dc..187220e 100644 --- a/services/camera/libcameraservice/api2/CameraDeviceClient.cpp +++ b/services/camera/libcameraservice/api2/CameraDeviceClient.cpp @@ -635,26 +635,56 @@ status_t CameraDeviceClient::getRotationTransformLocked(int32_t* transform) { return INVALID_OPERATION; } + camera_metadata_ro_entry_t entryFacing = staticInfo.find(ANDROID_LENS_FACING); + if (entry.count == 0) { + ALOGE("%s: Camera %d: Can't find android.lens.facing in " + "static metadata!", __FUNCTION__, mCameraId); + return INVALID_OPERATION; + } + int32_t& flags = *transform; + bool mirror = (entryFacing.data.u8[0] == ANDROID_LENS_FACING_FRONT); int orientation = entry.data.i32[0]; - switch (orientation) { - case 0: - flags = 0; - break; - case 90: - flags = NATIVE_WINDOW_TRANSFORM_ROT_90; - break; - case 180: - flags = NATIVE_WINDOW_TRANSFORM_ROT_180; - break; - case 270: - flags = NATIVE_WINDOW_TRANSFORM_ROT_270; - break; - default: - ALOGE("%s: Invalid HAL android.sensor.orientation value: %d", - __FUNCTION__, orientation); - return INVALID_OPERATION; + if (!mirror) { + switch (orientation) { + case 0: + flags = 0; + break; + case 90: + flags = NATIVE_WINDOW_TRANSFORM_ROT_90; + break; + case 180: + flags = NATIVE_WINDOW_TRANSFORM_ROT_180; + break; + case 270: + flags = NATIVE_WINDOW_TRANSFORM_ROT_270; + break; + default: + ALOGE("%s: Invalid HAL android.sensor.orientation value: %d", + __FUNCTION__, orientation); + return INVALID_OPERATION; + } + } else { + switch (orientation) { + case 0: + flags = HAL_TRANSFORM_FLIP_H; + break; + case 90: + flags = HAL_TRANSFORM_FLIP_H | HAL_TRANSFORM_ROT_90; + break; + case 180: + flags = HAL_TRANSFORM_FLIP_V; + break; + case 270: + flags = HAL_TRANSFORM_FLIP_V | HAL_TRANSFORM_ROT_90; + break; + default: + ALOGE("%s: Invalid HAL android.sensor.orientation value: %d", + __FUNCTION__, orientation); + return INVALID_OPERATION; + } + } /** diff --git a/tools/resampler_tools/fir.cpp b/tools/resampler_tools/fir.cpp index cc3d509..3d6a74d 100644 --- a/tools/resampler_tools/fir.cpp +++ b/tools/resampler_tools/fir.cpp @@ -20,15 +20,25 @@ #include <stdlib.h> #include <string.h> -static double sinc(double x) { +static inline double sinc(double x) { if (fabs(x) == 0.0f) return 1.0f; return sin(x) / x; } -static double sqr(double x) { +static inline double sqr(double x) { return x*x; } +static inline int64_t toint(double x, int64_t maxval) { + int64_t v; + + v = static_cast<int64_t>(floor(x * maxval + 0.5)); + if (v >= maxval) { + return maxval - 1; // error! + } + return v; +} + static double I0(double x) { // from the Numerical Recipes in C p. 237 double ax,ans,y; @@ -54,11 +64,12 @@ static double kaiser(int k, int N, double beta) { return I0(beta * sqrt(1.0 - sqr((2.0*k)/N - 1.0))) / I0(beta); } - static void usage(char* name) { fprintf(stderr, - "usage: %s [-h] [-d] [-s sample_rate] [-c cut-off_frequency] [-n half_zero_crossings] [-f {float|fixed}] [-b beta] [-v dBFS] [-l lerp]\n" - " %s [-h] [-d] [-s sample_rate] [-c cut-off_frequency] [-n half_zero_crossings] [-f {float|fixed}] [-b beta] [-v dBFS] -p M/N\n" + "usage: %s [-h] [-d] [-s sample_rate] [-c cut-off_frequency] [-n half_zero_crossings]" + " [-f {float|fixed|fixed16}] [-b beta] [-v dBFS] [-l lerp]\n" + " %s [-h] [-d] [-s sample_rate] [-c cut-off_frequency] [-n half_zero_crossings]" + " [-f {float|fixed|fixed16}] [-b beta] [-v dBFS] -p M/N\n" " -h this help message\n" " -d debug, print comma-separated coefficient table\n" " -p generate poly-phase filter coefficients, with sample increment M/N\n" @@ -66,6 +77,7 @@ static void usage(char* name) { " -c cut-off frequency (20478)\n" " -n number of zero-crossings on one side (8)\n" " -l number of lerping bits (4)\n" + " -m number of polyphases (related to -l, default 16)\n" " -f output format, can be fixed-point or floating-point (fixed)\n" " -b kaiser window parameter beta (7.865 [-80dB])\n" " -v attenuation in dBFS (0)\n", @@ -77,8 +89,7 @@ static void usage(char* name) { int main(int argc, char** argv) { // nc is the number of bits to store the coefficients - const int nc = 32; - + int nc = 32; bool polyphase = false; unsigned int polyM = 160; unsigned int polyN = 147; @@ -88,7 +99,6 @@ int main(int argc, char** argv) double atten = 1; int format = 0; - // in order to keep the errors associated with the linear // interpolation of the coefficients below the quantization error // we must satisfy: @@ -104,7 +114,6 @@ int main(int argc, char** argv) // Smith, J.O. Digital Audio Resampling Home Page // https://ccrma.stanford.edu/~jos/resample/, 2011-03-29 // - int nz = 4; // | 0.1102*(A - 8.7) A > 50 // beta = | 0.5842*(A - 21)^0.4 + 0.07886*(A - 21) 21 <= A <= 50 @@ -123,7 +132,6 @@ int main(int argc, char** argv) // 100 dB 10.056 double beta = 7.865; - // 2*nzc = (A - 8) / (2.285 * dw) // with dw the transition width = 2*pi*dF/Fs // @@ -148,8 +156,9 @@ int main(int argc, char** argv) // nzc = 20 // + int M = 1 << 4; // number of phases for interpolation int ch; - while ((ch = getopt(argc, argv, ":hds:c:n:f:l:b:p:v:")) != -1) { + while ((ch = getopt(argc, argv, ":hds:c:n:f:l:m:b:p:v:z:")) != -1) { switch (ch) { case 'd': debug = true; @@ -169,13 +178,26 @@ int main(int argc, char** argv) case 'n': nzc = atoi(optarg); break; + case 'm': + M = atoi(optarg); + break; case 'l': - nz = atoi(optarg); + M = 1 << atoi(optarg); break; case 'f': - if (!strcmp(optarg,"fixed")) format = 0; - else if (!strcmp(optarg,"float")) format = 1; - else usage(argv[0]); + if (!strcmp(optarg, "fixed")) { + format = 0; + } + else if (!strcmp(optarg, "fixed16")) { + format = 0; + nc = 16; + } + else if (!strcmp(optarg, "float")) { + format = 1; + } + else { + usage(argv[0]); + } break; case 'b': beta = atof(optarg); @@ -193,11 +215,14 @@ int main(int argc, char** argv) // cut off frequency ratio Fc/Fs double Fcr = Fc / Fs; - // total number of coefficients (one side) - const int M = (1 << nz); + const int N = M * nzc; + // lerp (which is most useful if M is a power of 2) + + int nz = 0; // recalculate nz as the bits needed to represent M + for (int i = M-1 ; i; i>>=1, nz++); // generate the right half of the filter if (!debug) { printf("// cmd-line: "); @@ -207,7 +232,7 @@ int main(int argc, char** argv) printf("\n"); if (!polyphase) { printf("const int32_t RESAMPLE_FIR_SIZE = %d;\n", N); - printf("const int32_t RESAMPLE_FIR_LERP_INT_BITS = %d;\n", nz); + printf("const int32_t RESAMPLE_FIR_INT_PHASES = %d;\n", M); printf("const int32_t RESAMPLE_FIR_NUM_COEF = %d;\n", nzc); } else { printf("const int32_t RESAMPLE_FIR_SIZE = %d;\n", 2*nzc*polyN); @@ -224,7 +249,7 @@ int main(int argc, char** argv) for (int i=0 ; i<=M ; i++) { // an extra set of coefs for interpolation for (int j=0 ; j<nzc ; j++) { int ix = j*M + i; - double x = (2.0 * M_PI * ix * Fcr) / (1 << nz); + double x = (2.0 * M_PI * ix * Fcr) / M; double y = kaiser(ix+N, 2*N, beta) * sinc(x) * 2.0 * Fcr; y *= atten; @@ -232,11 +257,13 @@ int main(int argc, char** argv) if (j == 0) printf("\n "); } - if (!format) { - int64_t yi = floor(y * ((1ULL<<(nc-1))) + 0.5); - if (yi >= (1LL<<(nc-1))) yi = (1LL<<(nc-1))-1; - printf("0x%08x, ", int32_t(yi)); + int64_t yi = toint(y, 1ULL<<(nc-1)); + if (nc > 16) { + printf("0x%08x, ", int32_t(yi)); + } else { + printf("0x%04x, ", int32_t(yi)&0xffff); + } } else { printf("%.9g%s ", y, debug ? "," : "f,"); } @@ -254,9 +281,12 @@ int main(int argc, char** argv) double y = kaiser(i+N, 2*N, beta) * sinc(x) * 2.0 * Fcr;; y *= atten; if (!format) { - int64_t yi = floor(y * ((1ULL<<(nc-1))) + 0.5); - if (yi >= (1LL<<(nc-1))) yi = (1LL<<(nc-1))-1; - printf("0x%08x", int32_t(yi)); + int64_t yi = toint(y, 1ULL<<(nc-1)); + if (nc > 16) { + printf("0x%08x, ", int32_t(yi)); + } else { + printf("0x%04x, ", int32_t(yi)&0xffff); + } } else { printf("%.9g%s", y, debug ? "" : "f"); } @@ -277,5 +307,3 @@ int main(int argc, char** argv) } // http://www.csee.umbc.edu/help/sound/AFsp-V2R1/html/audio/ResampAudio.html - - |