diff options
Diffstat (limited to 'services/audioflinger')
25 files changed, 4504 insertions, 937 deletions
diff --git a/services/audioflinger/Android.mk b/services/audioflinger/Android.mk index 54377f1..4524d3c 100644 --- a/services/audioflinger/Android.mk +++ b/services/audioflinger/Android.mk @@ -23,7 +23,8 @@ LOCAL_SRC_FILES:= \ AudioPolicyService.cpp \ ServiceUtilities.cpp \ AudioResamplerCubic.cpp.arm \ - AudioResamplerSinc.cpp.arm + AudioResamplerSinc.cpp.arm \ + AudioResamplerDyn.cpp.arm LOCAL_SRC_FILES += StateQueue.cpp @@ -74,12 +75,20 @@ include $(BUILD_SHARED_LIBRARY) include $(CLEAR_VARS) LOCAL_SRC_FILES:= \ - test-resample.cpp \ + test-resample.cpp \ AudioResampler.cpp.arm \ - AudioResamplerCubic.cpp.arm \ - AudioResamplerSinc.cpp.arm + AudioResamplerCubic.cpp.arm \ + AudioResamplerSinc.cpp.arm \ + AudioResamplerDyn.cpp.arm + +LOCAL_C_INCLUDES := \ + $(call include-path-for, audio-utils) + +LOCAL_STATIC_LIBRARIES := \ + libsndfile LOCAL_SHARED_LIBRARIES := \ + libaudioutils \ libdl \ libcutils \ libutils \ diff --git a/services/audioflinger/AudioFlinger.cpp b/services/audioflinger/AudioFlinger.cpp index e9c38e3..21bd2f4 100644 --- a/services/audioflinger/AudioFlinger.cpp +++ b/services/audioflinger/AudioFlinger.cpp @@ -104,6 +104,27 @@ static const nsecs_t kMinGlobalEffectEnabletimeNs = seconds(7200); // ---------------------------------------------------------------------------- +const char *formatToString(audio_format_t format) { + switch(format) { + case AUDIO_FORMAT_PCM_SUB_8_BIT: return "pcm8"; + case AUDIO_FORMAT_PCM_SUB_16_BIT: return "pcm16"; + case AUDIO_FORMAT_PCM_SUB_32_BIT: return "pcm32"; + case AUDIO_FORMAT_PCM_SUB_8_24_BIT: return "pcm8.24"; + case AUDIO_FORMAT_PCM_SUB_24_BIT_PACKED: return "pcm24"; + case AUDIO_FORMAT_PCM_SUB_FLOAT: return "pcmfloat"; + case AUDIO_FORMAT_MP3: return "mp3"; + case AUDIO_FORMAT_AMR_NB: return "amr-nb"; + case AUDIO_FORMAT_AMR_WB: return "amr-wb"; + case AUDIO_FORMAT_AAC: return "aac"; + case AUDIO_FORMAT_HE_AAC_V1: return "he-aac-v1"; + case AUDIO_FORMAT_HE_AAC_V2: return "he-aac-v2"; + case AUDIO_FORMAT_VORBIS: return "vorbis"; + default: + break; + } + return "unknown"; +} + static int load_audio_interface(const char *if_name, audio_hw_device_t **dev) { const hw_module_t *mod; @@ -162,12 +183,15 @@ AudioFlinger::AudioFlinger() (void) property_get("af.tee", value, "0"); teeEnabled = atoi(value); } - if (teeEnabled & 1) + if (teeEnabled & 1) { mTeeSinkInputEnabled = true; - if (teeEnabled & 2) + } + if (teeEnabled & 2) { mTeeSinkOutputEnabled = true; - if (teeEnabled & 4) + } + if (teeEnabled & 4) { mTeeSinkTrackEnabled = true; + } #endif } @@ -210,6 +234,18 @@ AudioFlinger::~AudioFlinger() audio_hw_device_close(mAudioHwDevs.valueAt(i)->hwDevice()); delete mAudioHwDevs.valueAt(i); } + + // Tell media.log service about any old writers that still need to be unregistered + sp<IBinder> binder = defaultServiceManager()->getService(String16("media.log")); + if (binder != 0) { + sp<IMediaLogService> mediaLogService(interface_cast<IMediaLogService>(binder)); + for (size_t count = mUnregisteredWriters.size(); count > 0; count--) { + sp<IMemory> iMemory(mUnregisteredWriters.top()->getIMemory()); + mUnregisteredWriters.pop(); + mediaLogService->unregisterWriter(iMemory); + } + } + } static const char * const audio_interfaces[] = { @@ -249,7 +285,7 @@ AudioFlinger::AudioHwDevice* AudioFlinger::findSuitableHwDev_l( return NULL; } -void AudioFlinger::dumpClients(int fd, const Vector<String16>& args) +void AudioFlinger::dumpClients(int fd, const Vector<String16>& args __unused) { const size_t SIZE = 256; char buffer[SIZE]; @@ -271,17 +307,17 @@ void AudioFlinger::dumpClients(int fd, const Vector<String16>& args) } result.append("Global session refs:\n"); - result.append(" session pid count\n"); + result.append(" session pid count\n"); for (size_t i = 0; i < mAudioSessionRefs.size(); i++) { AudioSessionRef *r = mAudioSessionRefs[i]; - snprintf(buffer, SIZE, " %7d %3d %3d\n", r->mSessionid, r->mPid, r->mCnt); + snprintf(buffer, SIZE, " %7d %5d %5d\n", r->mSessionid, r->mPid, r->mCnt); result.append(buffer); } write(fd, result.string(), result.size()); } -void AudioFlinger::dumpInternals(int fd, const Vector<String16>& args) +void AudioFlinger::dumpInternals(int fd, const Vector<String16>& args __unused) { const size_t SIZE = 256; char buffer[SIZE]; @@ -296,7 +332,7 @@ void AudioFlinger::dumpInternals(int fd, const Vector<String16>& args) write(fd, result.string(), result.size()); } -void AudioFlinger::dumpPermissionDenial(int fd, const Vector<String16>& args) +void AudioFlinger::dumpPermissionDenial(int fd, const Vector<String16>& args __unused) { const size_t SIZE = 256; char buffer[SIZE]; @@ -403,16 +439,44 @@ sp<AudioFlinger::Client> AudioFlinger::registerPid_l(pid_t pid) sp<NBLog::Writer> AudioFlinger::newWriter_l(size_t size, const char *name) { + // If there is no memory allocated for logs, return a dummy writer that does nothing if (mLogMemoryDealer == 0) { return new NBLog::Writer(); } - sp<IMemory> shared = mLogMemoryDealer->allocate(NBLog::Timeline::sharedSize(size)); - sp<NBLog::Writer> writer = new NBLog::Writer(size, shared); sp<IBinder> binder = defaultServiceManager()->getService(String16("media.log")); - if (binder != 0) { - interface_cast<IMediaLogService>(binder)->registerWriter(shared, size, name); + // Similarly if we can't contact the media.log service, also return a dummy writer + if (binder == 0) { + return new NBLog::Writer(); } - return writer; + sp<IMediaLogService> mediaLogService(interface_cast<IMediaLogService>(binder)); + sp<IMemory> shared = mLogMemoryDealer->allocate(NBLog::Timeline::sharedSize(size)); + // If allocation fails, consult the vector of previously unregistered writers + // and garbage-collect one or more them until an allocation succeeds + if (shared == 0) { + Mutex::Autolock _l(mUnregisteredWritersLock); + for (size_t count = mUnregisteredWriters.size(); count > 0; count--) { + { + // Pick the oldest stale writer to garbage-collect + sp<IMemory> iMemory(mUnregisteredWriters[0]->getIMemory()); + mUnregisteredWriters.removeAt(0); + mediaLogService->unregisterWriter(iMemory); + // Now the media.log remote reference to IMemory is gone. When our last local + // reference to IMemory also drops to zero at end of this block, + // the IMemory destructor will deallocate the region from mLogMemoryDealer. + } + // Re-attempt the allocation + shared = mLogMemoryDealer->allocate(NBLog::Timeline::sharedSize(size)); + if (shared != 0) { + goto success; + } + } + // Even after garbage-collecting all old writers, there is still not enough memory, + // so return a dummy writer + return new NBLog::Writer(); + } +success: + mediaLogService->registerWriter(shared, size, name); + return new NBLog::Writer(size, shared); } void AudioFlinger::unregisterWriter(const sp<NBLog::Writer>& writer) @@ -424,13 +488,10 @@ void AudioFlinger::unregisterWriter(const sp<NBLog::Writer>& writer) if (iMemory == 0) { return; } - sp<IBinder> binder = defaultServiceManager()->getService(String16("media.log")); - if (binder != 0) { - interface_cast<IMediaLogService>(binder)->unregisterWriter(iMemory); - // Now the media.log remote reference to IMemory is gone. - // When our last local reference to IMemory also drops to zero, - // the IMemory destructor will deallocate the region from mMemoryDealer. - } + // Rather than removing the writer immediately, append it to a queue of old writers to + // be garbage-collected later. This allows us to continue to view old logs for a while. + Mutex::Autolock _l(mUnregisteredWritersLock); + mUnregisteredWriters.push(writer); } // IAudioFlinger interface @@ -441,7 +502,7 @@ sp<IAudioTrack> AudioFlinger::createTrack( uint32_t sampleRate, audio_format_t format, audio_channel_mask_t channelMask, - size_t frameCount, + size_t *frameCount, IAudioFlinger::track_flags_t *flags, const sp<IMemory>& sharedBuffer, audio_io_handle_t output, @@ -468,7 +529,13 @@ sp<IAudioTrack> AudioFlinger::createTrack( // client is responsible for conversion of 8-bit PCM to 16-bit PCM, // and we don't yet support 8.24 or 32-bit PCM if (audio_is_linear_pcm(format) && format != AUDIO_FORMAT_PCM_16_BIT) { - ALOGE("createTrack() invalid format %d", format); + ALOGE("createTrack() invalid format %#x", format); + lStatus = BAD_VALUE; + goto Exit; + } + + if (sharedBuffer != 0 && sharedBuffer->pointer() == NULL) { + ALOGE("createTrack() sharedBuffer is non-0 but has NULL pointer()"); lStatus = BAD_VALUE; goto Exit; } @@ -488,7 +555,7 @@ sp<IAudioTrack> AudioFlinger::createTrack( client = registerPid_l(pid); ALOGV("createTrack() sessionId: %d", (sessionId == NULL) ? -2 : *sessionId); - if (sessionId != NULL && *sessionId != AUDIO_SESSION_OUTPUT_MIX) { + if (sessionId != NULL && *sessionId != AUDIO_SESSION_ALLOCATE) { // check if an effect chain with the same session ID is present on another // output thread and move it here. for (size_t i = 0; i < mPlaybackThreads.size(); i++) { @@ -513,10 +580,13 @@ sp<IAudioTrack> AudioFlinger::createTrack( track = thread->createTrack_l(client, streamType, sampleRate, format, channelMask, frameCount, sharedBuffer, lSessionId, flags, tid, clientUid, &lStatus); + LOG_ALWAYS_FATAL_IF((lStatus == NO_ERROR) && (track == 0)); + // we don't abort yet if lStatus != NO_ERROR; there is still work to be done regardless // move effect chain to this output thread if an effect on same session was waiting // for a track to be created if (lStatus == NO_ERROR && effectThread != NULL) { + // no risk of deadlock because AudioFlinger::mLock is held Mutex::Autolock _dl(thread->mLock); Mutex::Autolock _sl(effectThread->mLock); moveEffectChain_l(lSessionId, effectThread, thread, true); @@ -536,7 +606,9 @@ sp<IAudioTrack> AudioFlinger::createTrack( } } } + } + if (lStatus == NO_ERROR) { // s for server's pid, n for normal mixer name, f for fast index name = String8::format("s:%d;n:%d;f:%d", getpid_cached, track->name() - AudioMixer::TRACK0, @@ -550,9 +622,7 @@ sp<IAudioTrack> AudioFlinger::createTrack( } Exit: - if (status != NULL) { - *status = lStatus; - } + *status = lStatus; return trackHandle; } @@ -1210,7 +1280,7 @@ AudioFlinger::NotificationClient::~NotificationClient() { } -void AudioFlinger::NotificationClient::binderDied(const wp<IBinder>& who) +void AudioFlinger::NotificationClient::binderDied(const wp<IBinder>& who __unused) { sp<NotificationClient> keep(this); mAudioFlinger->removeNotificationClient(mPid); @@ -1228,7 +1298,7 @@ sp<IAudioRecord> AudioFlinger::openRecord( uint32_t sampleRate, audio_format_t format, audio_channel_mask_t channelMask, - size_t frameCount, + size_t *frameCount, IAudioFlinger::track_flags_t *flags, pid_t tid, int *sessionId, @@ -1250,7 +1320,7 @@ sp<IAudioRecord> AudioFlinger::openRecord( } if (format != AUDIO_FORMAT_PCM_16_BIT) { - ALOGE("openRecord() invalid format %d", format); + ALOGE("openRecord() invalid format %#x", format); lStatus = BAD_VALUE; goto Exit; } @@ -1276,7 +1346,7 @@ sp<IAudioRecord> AudioFlinger::openRecord( client = registerPid_l(pid); // If no audio session id is provided, create one here - if (sessionId != NULL && *sessionId != AUDIO_SESSION_OUTPUT_MIX) { + if (sessionId != NULL && *sessionId != AUDIO_SESSION_ALLOCATE) { lSessionId = *sessionId; } else { lSessionId = nextUniqueId(); @@ -1291,8 +1361,9 @@ sp<IAudioRecord> AudioFlinger::openRecord( frameCount, lSessionId, IPCThreadState::self()->getCallingUid(), flags, tid, &lStatus); - LOG_ALWAYS_FATAL_IF((recordTrack != 0) != (lStatus == NO_ERROR)); + LOG_ALWAYS_FATAL_IF((lStatus == NO_ERROR) && (recordTrack == 0)); } + if (lStatus != NO_ERROR) { // remove local strong reference to Client before deleting the RecordTrack so that the // Client destructor is called by the TrackBase destructor with mLock held @@ -1301,14 +1372,11 @@ sp<IAudioRecord> AudioFlinger::openRecord( goto Exit; } - // return to handle to client + // return handle to client recordHandle = new RecordHandle(recordTrack); - lStatus = NO_ERROR; Exit: - if (status) { - *status = lStatus; - } + *status = lStatus; return recordHandle; } @@ -1449,18 +1517,15 @@ audio_io_handle_t AudioFlinger::openOutput(audio_module_handle_t module, audio_output_flags_t flags, const audio_offload_info_t *offloadInfo) { - PlaybackThread *thread = NULL; struct audio_config config; + memset(&config, 0, sizeof(config)); config.sample_rate = (pSamplingRate != NULL) ? *pSamplingRate : 0; config.channel_mask = (pChannelMask != NULL) ? *pChannelMask : 0; config.format = (pFormat != NULL) ? *pFormat : AUDIO_FORMAT_DEFAULT; - if (offloadInfo) { + if (offloadInfo != NULL) { config.offload_info = *offloadInfo; } - audio_stream_out_t *outStream = NULL; - AudioHwDevice *outHwDev; - ALOGV("openOutput(), module %d Device %x, SamplingRate %d, Format %#08x, Channels %x, flags %x", module, (pDevices != NULL) ? *pDevices : 0, @@ -1469,7 +1534,7 @@ audio_io_handle_t AudioFlinger::openOutput(audio_module_handle_t module, config.channel_mask, flags); ALOGV("openOutput(), offloadInfo %p version 0x%04x", - offloadInfo, offloadInfo == NULL ? -1 : offloadInfo->version ); + offloadInfo, offloadInfo == NULL ? -1 : offloadInfo->version); if (pDevices == NULL || *pDevices == 0) { return 0; @@ -1477,15 +1542,17 @@ audio_io_handle_t AudioFlinger::openOutput(audio_module_handle_t module, Mutex::Autolock _l(mLock); - outHwDev = findSuitableHwDev_l(module, *pDevices); - if (outHwDev == NULL) + AudioHwDevice *outHwDev = findSuitableHwDev_l(module, *pDevices); + if (outHwDev == NULL) { return 0; + } audio_hw_device_t *hwDevHal = outHwDev->hwDevice(); audio_io_handle_t id = nextUniqueId(); mHardwareStatus = AUDIO_HW_OUTPUT_OPEN; + audio_stream_out_t *outStream = NULL; status_t status = hwDevHal->open_output_stream(hwDevHal, id, *pDevices, @@ -1505,6 +1572,7 @@ audio_io_handle_t AudioFlinger::openOutput(audio_module_handle_t module, if (status == NO_ERROR && outStream != NULL) { AudioStreamOut *output = new AudioStreamOut(outHwDev, outStream, flags); + PlaybackThread *thread; if (flags & AUDIO_OUTPUT_FLAG_COMPRESS_OFFLOAD) { thread = new OffloadThread(this, output, id, *pDevices); ALOGV("openOutput() created offload output: ID %d thread %p", id, thread); @@ -1672,18 +1740,15 @@ audio_io_handle_t AudioFlinger::openInput(audio_module_handle_t module, audio_format_t *pFormat, audio_channel_mask_t *pChannelMask) { - status_t status; - RecordThread *thread = NULL; struct audio_config config; + memset(&config, 0, sizeof(config)); config.sample_rate = (pSamplingRate != NULL) ? *pSamplingRate : 0; config.channel_mask = (pChannelMask != NULL) ? *pChannelMask : 0; config.format = (pFormat != NULL) ? *pFormat : AUDIO_FORMAT_DEFAULT; uint32_t reqSamplingRate = config.sample_rate; audio_format_t reqFormat = config.format; - audio_channel_mask_t reqChannels = config.channel_mask; - audio_stream_in_t *inStream = NULL; - AudioHwDevice *inHwDev; + audio_channel_mask_t reqChannelMask = config.channel_mask; if (pDevices == NULL || *pDevices == 0) { return 0; @@ -1691,16 +1756,18 @@ audio_io_handle_t AudioFlinger::openInput(audio_module_handle_t module, Mutex::Autolock _l(mLock); - inHwDev = findSuitableHwDev_l(module, *pDevices); - if (inHwDev == NULL) + AudioHwDevice *inHwDev = findSuitableHwDev_l(module, *pDevices); + if (inHwDev == NULL) { return 0; + } audio_hw_device_t *inHwHal = inHwDev->hwDevice(); audio_io_handle_t id = nextUniqueId(); - status = inHwHal->open_input_stream(inHwHal, id, *pDevices, &config, + audio_stream_in_t *inStream = NULL; + status_t status = inHwHal->open_input_stream(inHwHal, id, *pDevices, &config, &inStream); - ALOGV("openInput() openInputStream returned input %p, SamplingRate %d, Format %d, Channels %x, " + ALOGV("openInput() openInputStream returned input %p, SamplingRate %d, Format %#x, Channels %x, " "status %d", inStream, config.sample_rate, @@ -1714,10 +1781,12 @@ audio_io_handle_t AudioFlinger::openInput(audio_module_handle_t module, if (status == BAD_VALUE && reqFormat == config.format && config.format == AUDIO_FORMAT_PCM_16_BIT && (config.sample_rate <= 2 * reqSamplingRate) && - (popcount(config.channel_mask) <= FCC_2) && (popcount(reqChannels) <= FCC_2)) { + (popcount(config.channel_mask) <= FCC_2) && (popcount(reqChannelMask) <= FCC_2)) { + // FIXME describe the change proposed by HAL (save old values so we can log them here) ALOGV("openInput() reopening with proposed sampling rate and channel mask"); inStream = NULL; status = inHwHal->open_input_stream(inHwHal, id, *pDevices, &config, &inStream); + // FIXME log this new status; HAL should not propose any further changes } if (status == NO_ERROR && inStream != NULL) { @@ -1735,7 +1804,7 @@ audio_io_handle_t AudioFlinger::openInput(audio_module_handle_t module, popcount(inStream->common.get_channels(&inStream->common))); if (!mTeeSinkInputEnabled) { kind = TEE_SINK_NO; - } else if (format == Format_Invalid) { + } else if (!Format_isValid(format)) { kind = TEE_SINK_NO; } else if (mRecordTeeSink == 0) { kind = TEE_SINK_NEW; @@ -1776,10 +1845,10 @@ audio_io_handle_t AudioFlinger::openInput(audio_module_handle_t module, // Start record thread // RecordThread requires both input and output device indication to forward to audio // pre processing modules - thread = new RecordThread(this, + RecordThread *thread = new RecordThread(this, input, reqSamplingRate, - reqChannels, + reqChannelMask, id, primaryOutputDevice_l(), *pDevices @@ -1796,7 +1865,7 @@ audio_io_handle_t AudioFlinger::openInput(audio_module_handle_t module, *pFormat = config.format; } if (pChannelMask != NULL) { - *pChannelMask = reqChannels; + *pChannelMask = reqChannelMask; } // notify client processes of the new input creation @@ -1954,7 +2023,7 @@ void AudioFlinger::purgeStaleEffects_l() { } } if (!found) { - Mutex::Autolock _l (t->mLock); + Mutex::Autolock _l(t->mLock); // remove all effects from the chain while (ec->mEffects.size()) { sp<EffectModule> effect = ec->mEffects[0]; @@ -2249,9 +2318,7 @@ sp<IEffect> AudioFlinger::createEffect( } Exit: - if (status != NULL) { - *status = lStatus; - } + *status = lStatus; return handle; } diff --git a/services/audioflinger/AudioFlinger.h b/services/audioflinger/AudioFlinger.h index 7320144..459d2ec 100644 --- a/services/audioflinger/AudioFlinger.h +++ b/services/audioflinger/AudioFlinger.h @@ -60,8 +60,8 @@ namespace android { -class audio_track_cblk_t; -class effect_param_cblk_t; +struct audio_track_cblk_t; +struct effect_param_cblk_t; class AudioMixer; class AudioBuffer; class AudioResampler; @@ -102,7 +102,7 @@ public: uint32_t sampleRate, audio_format_t format, audio_channel_mask_t channelMask, - size_t frameCount, + size_t *pFrameCount, IAudioFlinger::track_flags_t *flags, const sp<IMemory>& sharedBuffer, audio_io_handle_t output, @@ -110,18 +110,18 @@ public: int *sessionId, String8& name, int clientUid, - status_t *status); + status_t *status /*non-NULL*/); virtual sp<IAudioRecord> openRecord( audio_io_handle_t input, uint32_t sampleRate, audio_format_t format, audio_channel_mask_t channelMask, - size_t frameCount, + size_t *pFrameCount, IAudioFlinger::track_flags_t *flags, pid_t tid, int *sessionId, - status_t *status); + status_t *status /*non-NULL*/); virtual uint32_t sampleRate(audio_io_handle_t output) const; virtual int channelCount(audio_io_handle_t output) const; @@ -210,7 +210,7 @@ public: int32_t priority, audio_io_handle_t io, int sessionId, - status_t *status, + status_t *status /*non-NULL*/, int *id, int *enabled); @@ -235,8 +235,12 @@ public: sp<NBLog::Writer> newWriter_l(size_t size, const char *name); void unregisterWriter(const sp<NBLog::Writer>& writer); private: - static const size_t kLogMemorySize = 10 * 1024; + static const size_t kLogMemorySize = 40 * 1024; sp<MemoryDealer> mLogMemoryDealer; // == 0 when NBLog is disabled + // When a log writer is unregistered, it is done lazily so that media.log can continue to see it + // for as long as possible. The memory is only freed when it is needed for another log writer. + Vector< sp<NBLog::Writer> > mUnregisteredWriters; + Mutex mUnregisteredWritersLock; public: class SyncEvent; @@ -499,7 +503,7 @@ private: private: const char * const mModuleName; audio_hw_device_t * const mHwDevice; - Flags mFlags; + const Flags mFlags; }; // AudioStreamOut and AudioStreamIn are immutable, so their fields are const. @@ -509,7 +513,7 @@ private: struct AudioStreamOut { AudioHwDevice* const audioHwDev; audio_stream_out_t* const stream; - audio_output_flags_t flags; + const audio_output_flags_t flags; audio_hw_device_t* hwDev() const { return audioHwDev->hwDevice(); } @@ -651,6 +655,8 @@ private: #undef INCLUDING_FROM_AUDIOFLINGER_H +const char *formatToString(audio_format_t format); + // ---------------------------------------------------------------------------- }; // namespace android diff --git a/services/audioflinger/AudioMixer.cpp b/services/audioflinger/AudioMixer.cpp index f92421e..67d83b1 100644 --- a/services/audioflinger/AudioMixer.cpp +++ b/services/audioflinger/AudioMixer.cpp @@ -58,7 +58,7 @@ AudioMixer::DownmixerBufferProvider::~DownmixerBufferProvider() status_t AudioMixer::DownmixerBufferProvider::getNextBuffer(AudioBufferProvider::Buffer *pBuffer, int64_t pts) { //ALOGV("DownmixerBufferProvider::getNextBuffer()"); - if (this->mTrackBufferProvider != NULL) { + if (mTrackBufferProvider != NULL) { status_t res = mTrackBufferProvider->getNextBuffer(pBuffer, pts); if (res == OK) { mDownmixConfig.inputCfg.buffer.frameCount = pBuffer->frameCount; @@ -81,7 +81,7 @@ status_t AudioMixer::DownmixerBufferProvider::getNextBuffer(AudioBufferProvider: void AudioMixer::DownmixerBufferProvider::releaseBuffer(AudioBufferProvider::Buffer *pBuffer) { //ALOGV("DownmixerBufferProvider::releaseBuffer()"); - if (this->mTrackBufferProvider != NULL) { + if (mTrackBufferProvider != NULL) { mTrackBufferProvider->releaseBuffer(pBuffer); } else { ALOGE("DownmixerBufferProvider::releaseBuffer() error: NULL track buffer provider"); @@ -90,9 +90,9 @@ void AudioMixer::DownmixerBufferProvider::releaseBuffer(AudioBufferProvider::Buf // ---------------------------------------------------------------------------- -bool AudioMixer::isMultichannelCapable = false; +bool AudioMixer::sIsMultichannelCapable = false; -effect_descriptor_t AudioMixer::dwnmFxDesc; +effect_descriptor_t AudioMixer::sDwnmFxDesc; // Ensure mConfiguredNames bitmask is initialized properly on all architectures. // The value of 1 << x is undefined in C when x >= 32. @@ -113,8 +113,6 @@ AudioMixer::AudioMixer(size_t frameCount, uint32_t sampleRate, uint32_t maxNumTr // AudioMixer is not yet capable of multi-channel output beyond stereo ALOG_ASSERT(2 == MAX_NUM_CHANNELS, "bad MAX_NUM_CHANNELS %d", MAX_NUM_CHANNELS); - LocalClock lc; - pthread_once(&sOnceControl, &sInitRoutine); mState.enabledTracks= 0; @@ -136,27 +134,6 @@ AudioMixer::AudioMixer(size_t frameCount, uint32_t sampleRate, uint32_t maxNumTr t++; } - // find multichannel downmix effect if we have to play multichannel content - uint32_t numEffects = 0; - int ret = EffectQueryNumberEffects(&numEffects); - if (ret != 0) { - ALOGE("AudioMixer() error %d querying number of effects", ret); - return; - } - ALOGV("EffectQueryNumberEffects() numEffects=%d", numEffects); - - for (uint32_t i = 0 ; i < numEffects ; i++) { - if (EffectQueryEffect(i, &dwnmFxDesc) == 0) { - ALOGV("effect %d is called %s", i, dwnmFxDesc.name); - if (memcmp(&dwnmFxDesc.type, EFFECT_UIID_DOWNMIX, sizeof(effect_uuid_t)) == 0) { - ALOGI("found effect \"%s\" from %s", - dwnmFxDesc.name, dwnmFxDesc.implementor); - isMultichannelCapable = true; - break; - } - } - } - ALOGE_IF(!isMultichannelCapable, "unable to find downmix effect"); } AudioMixer::~AudioMixer() @@ -229,7 +206,7 @@ int AudioMixer::getTrackName(audio_channel_mask_t channelMask, int sessionId) void AudioMixer::invalidateState(uint32_t mask) { - if (mask) { + if (mask != 0) { mState.needsChanged |= mask; mState.hook = process__validate; } @@ -252,7 +229,7 @@ status_t AudioMixer::initTrackDownmix(track_t* pTrack, int trackNum, audio_chann return status; } -void AudioMixer::unprepareTrackForDownmix(track_t* pTrack, int trackName) { +void AudioMixer::unprepareTrackForDownmix(track_t* pTrack, int trackName __unused) { ALOGV("AudioMixer::unprepareTrackForDownmix(%d)", trackName); if (pTrack->downmixerBufferProvider != NULL) { @@ -276,13 +253,13 @@ status_t AudioMixer::prepareTrackForDownmix(track_t* pTrack, int trackName) DownmixerBufferProvider* pDbp = new DownmixerBufferProvider(); int32_t status; - if (!isMultichannelCapable) { + if (!sIsMultichannelCapable) { ALOGE("prepareTrackForDownmix(%d) fails: mixer doesn't support multichannel content", trackName); goto noDownmixForActiveTrack; } - if (EffectCreate(&dwnmFxDesc.uuid, + if (EffectCreate(&sDwnmFxDesc.uuid, pTrack->sessionId /*sessionId*/, -2 /*ioId not relevant here, using random value*/, &pDbp->mDownmixHandle/*pHandle*/) != 0) { ALOGE("prepareTrackForDownmix(%d) fails: error creating downmixer effect", trackName); @@ -560,14 +537,14 @@ bool AudioMixer::track_t::setResampler(uint32_t value, uint32_t devSampleRate) // Should have a way to distinguish tracks with static ratios vs. dynamic ratios. if (!((value == 44100 && devSampleRate == 48000) || (value == 48000 && devSampleRate == 44100))) { - quality = AudioResampler::LOW_QUALITY; + quality = AudioResampler::DYN_LOW_QUALITY; } else { quality = AudioResampler::DEFAULT_QUALITY; } resampler = AudioResampler::create( format, // the resampler sees the number of channels after the downmixer, if any - downmixerBufferProvider != NULL ? MAX_NUM_CHANNELS : channelCount, + (int) (downmixerBufferProvider != NULL ? MAX_NUM_CHANNELS : channelCount), devSampleRate, quality); resampler->setLocalTimeFreq(sLocalTimeFreq); } @@ -668,27 +645,29 @@ void AudioMixer::process__validate(state_t* state, int64_t pts) countActiveTracks++; track_t& t = state->tracks[i]; uint32_t n = 0; + // FIXME can overflow (mask is only 3 bits) n |= NEEDS_CHANNEL_1 + t.channelCount - 1; - n |= NEEDS_FORMAT_16; - n |= t.doesResample() ? NEEDS_RESAMPLE_ENABLED : NEEDS_RESAMPLE_DISABLED; + if (t.doesResample()) { + n |= NEEDS_RESAMPLE; + } if (t.auxLevel != 0 && t.auxBuffer != NULL) { - n |= NEEDS_AUX_ENABLED; + n |= NEEDS_AUX; } if (t.volumeInc[0]|t.volumeInc[1]) { volumeRamp = true; } else if (!t.doesResample() && t.volumeRL == 0) { - n |= NEEDS_MUTE_ENABLED; + n |= NEEDS_MUTE; } t.needs = n; - if ((n & NEEDS_MUTE__MASK) == NEEDS_MUTE_ENABLED) { + if (n & NEEDS_MUTE) { t.hook = track__nop; } else { - if ((n & NEEDS_AUX__MASK) == NEEDS_AUX_ENABLED) { + if (n & NEEDS_AUX) { all16BitsStereoNoResample = false; } - if ((n & NEEDS_RESAMPLE__MASK) == NEEDS_RESAMPLE_ENABLED) { + if (n & NEEDS_RESAMPLE) { all16BitsStereoNoResample = false; resampling = true; t.hook = track__genericResample; @@ -710,7 +689,7 @@ void AudioMixer::process__validate(state_t* state, int64_t pts) // select the processing hooks state->hook = process__nop; - if (countActiveTracks) { + if (countActiveTracks > 0) { if (resampling) { if (!state->outputTemp) { state->outputTemp = new int32_t[MAX_NUM_CHANNELS * state->frameCount]; @@ -746,16 +725,15 @@ void AudioMixer::process__validate(state_t* state, int64_t pts) // Now that the volume ramp has been done, set optimal state and // track hooks for subsequent mixer process - if (countActiveTracks) { + if (countActiveTracks > 0) { bool allMuted = true; uint32_t en = state->enabledTracks; while (en) { const int i = 31 - __builtin_clz(en); en &= ~(1<<i); track_t& t = state->tracks[i]; - if (!t.doesResample() && t.volumeRL == 0) - { - t.needs |= NEEDS_MUTE_ENABLED; + if (!t.doesResample() && t.volumeRL == 0) { + t.needs |= NEEDS_MUTE; t.hook = track__nop; } else { allMuted = false; @@ -806,8 +784,8 @@ void AudioMixer::track__genericResample(track_t* t, int32_t* out, size_t outFram } } -void AudioMixer::track__nop(track_t* t, int32_t* out, size_t outFrameCount, int32_t* temp, - int32_t* aux) +void AudioMixer::track__nop(track_t* t __unused, int32_t* out __unused, + size_t outFrameCount __unused, int32_t* temp __unused, int32_t* aux __unused) { } @@ -883,8 +861,8 @@ void AudioMixer::volumeStereo(track_t* t, int32_t* out, size_t frameCount, int32 } } -void AudioMixer::track__16BitsStereo(track_t* t, int32_t* out, size_t frameCount, int32_t* temp, - int32_t* aux) +void AudioMixer::track__16BitsStereo(track_t* t, int32_t* out, size_t frameCount, + int32_t* temp __unused, int32_t* aux) { const int16_t *in = static_cast<const int16_t *>(t->in); @@ -974,8 +952,8 @@ void AudioMixer::track__16BitsStereo(track_t* t, int32_t* out, size_t frameCount t->in = in; } -void AudioMixer::track__16BitsMono(track_t* t, int32_t* out, size_t frameCount, int32_t* temp, - int32_t* aux) +void AudioMixer::track__16BitsMono(track_t* t, int32_t* out, size_t frameCount, + int32_t* temp __unused, int32_t* aux) { const int16_t *in = static_cast<int16_t const *>(t->in); @@ -1154,7 +1132,7 @@ void AudioMixer::process__genericNoResampling(state_t* state, int64_t pts) track_t& t = state->tracks[i]; size_t outFrames = BLOCKSIZE; int32_t *aux = NULL; - if (CC_UNLIKELY((t.needs & NEEDS_AUX__MASK) == NEEDS_AUX_ENABLED)) { + if (CC_UNLIKELY(t.needs & NEEDS_AUX)) { aux = t.auxBuffer + numFrames; } while (outFrames) { @@ -1166,7 +1144,7 @@ void AudioMixer::process__genericNoResampling(state_t* state, int64_t pts) break; } size_t inFrames = (t.frameCount > outFrames)?outFrames:t.frameCount; - if (inFrames) { + if (inFrames > 0) { t.hook(&t, outTemp + (BLOCKSIZE-outFrames)*MAX_NUM_CHANNELS, inFrames, state->resampleTemp, aux); t.frameCount -= inFrames; @@ -1242,14 +1220,14 @@ void AudioMixer::process__genericResampling(state_t* state, int64_t pts) e1 &= ~(1<<i); track_t& t = state->tracks[i]; int32_t *aux = NULL; - if (CC_UNLIKELY((t.needs & NEEDS_AUX__MASK) == NEEDS_AUX_ENABLED)) { + if (CC_UNLIKELY(t.needs & NEEDS_AUX)) { aux = t.auxBuffer; } // this is a little goofy, on the resampling case we don't // acquire/release the buffers because it's done by // the resampler. - if ((t.needs & NEEDS_RESAMPLE__MASK) == NEEDS_RESAMPLE_ENABLED) { + if (t.needs & NEEDS_RESAMPLE) { t.resampler->setPTS(pts); t.hook(&t, outTemp, numFrames, state->resampleTemp, aux); } else { @@ -1449,8 +1427,9 @@ void AudioMixer::process__TwoTracks16BitsStereoNoResampling(state_t* state, int64_t AudioMixer::calculateOutputPTS(const track_t& t, int64_t basePTS, int outputFrameIndex) { - if (AudioBufferProvider::kInvalidPTS == basePTS) + if (AudioBufferProvider::kInvalidPTS == basePTS) { return AudioBufferProvider::kInvalidPTS; + } return basePTS + ((outputFrameIndex * sLocalTimeFreq) / t.sampleRate); } @@ -1462,6 +1441,28 @@ int64_t AudioMixer::calculateOutputPTS(const track_t& t, int64_t basePTS, { LocalClock lc; sLocalTimeFreq = lc.getLocalFreq(); + + // find multichannel downmix effect if we have to play multichannel content + uint32_t numEffects = 0; + int ret = EffectQueryNumberEffects(&numEffects); + if (ret != 0) { + ALOGE("AudioMixer() error %d querying number of effects", ret); + return; + } + ALOGV("EffectQueryNumberEffects() numEffects=%d", numEffects); + + for (uint32_t i = 0 ; i < numEffects ; i++) { + if (EffectQueryEffect(i, &sDwnmFxDesc) == 0) { + ALOGV("effect %d is called %s", i, sDwnmFxDesc.name); + if (memcmp(&sDwnmFxDesc.type, EFFECT_UIID_DOWNMIX, sizeof(effect_uuid_t)) == 0) { + ALOGI("found effect \"%s\" from %s", + sDwnmFxDesc.name, sDwnmFxDesc.implementor); + sIsMultichannelCapable = true; + break; + } + } + } + ALOGW_IF(!sIsMultichannelCapable, "unable to find downmix effect"); } // ---------------------------------------------------------------------------- diff --git a/services/audioflinger/AudioMixer.h b/services/audioflinger/AudioMixer.h index 43aeb86..d286986 100644 --- a/services/audioflinger/AudioMixer.h +++ b/services/audioflinger/AudioMixer.h @@ -120,27 +120,19 @@ public: private: enum { + // FIXME this representation permits up to 8 channels NEEDS_CHANNEL_COUNT__MASK = 0x00000007, - NEEDS_FORMAT__MASK = 0x000000F0, - NEEDS_MUTE__MASK = 0x00000100, - NEEDS_RESAMPLE__MASK = 0x00001000, - NEEDS_AUX__MASK = 0x00010000, }; enum { - NEEDS_CHANNEL_1 = 0x00000000, - NEEDS_CHANNEL_2 = 0x00000001, + NEEDS_CHANNEL_1 = 0x00000000, // mono + NEEDS_CHANNEL_2 = 0x00000001, // stereo - NEEDS_FORMAT_16 = 0x00000010, + // sample format is not explicitly specified, and is assumed to be AUDIO_FORMAT_PCM_16_BIT - NEEDS_MUTE_DISABLED = 0x00000000, - NEEDS_MUTE_ENABLED = 0x00000100, - - NEEDS_RESAMPLE_DISABLED = 0x00000000, - NEEDS_RESAMPLE_ENABLED = 0x00001000, - - NEEDS_AUX_DISABLED = 0x00000000, - NEEDS_AUX_ENABLED = 0x00010000, + NEEDS_MUTE = 0x00000100, + NEEDS_RESAMPLE = 0x00001000, + NEEDS_AUX = 0x00010000, }; struct state_t; @@ -224,7 +216,7 @@ private: NBLog::Writer* mLog; int32_t reserved[1]; // FIXME allocate dynamically to save some memory when maxNumTracks < MAX_NUM_TRACKS - track_t tracks[MAX_NUM_TRACKS]; __attribute__((aligned(32))); + track_t tracks[MAX_NUM_TRACKS] __attribute__((aligned(32))); }; // AudioBufferProvider that wraps a track AudioBufferProvider by a call to a downmix effect @@ -256,9 +248,9 @@ private: state_t mState __attribute__((aligned(32))); // effect descriptor for the downmixer used by the mixer - static effect_descriptor_t dwnmFxDesc; + static effect_descriptor_t sDwnmFxDesc; // indicates whether a downmix effect has been found and is usable by this mixer - static bool isMultichannelCapable; + static bool sIsMultichannelCapable; // Call after changing either the enabled status of a track, or parameters of an enabled track. // OK to call more often than that, but unnecessary. diff --git a/services/audioflinger/AudioPolicyService.cpp b/services/audioflinger/AudioPolicyService.cpp index 646a317..9980344 100644 --- a/services/audioflinger/AudioPolicyService.cpp +++ b/services/audioflinger/AudioPolicyService.cpp @@ -60,7 +60,7 @@ namespace { // ---------------------------------------------------------------------------- AudioPolicyService::AudioPolicyService() - : BnAudioPolicyService() , mpAudioPolicyDev(NULL) , mpAudioPolicy(NULL) + : BnAudioPolicyService(), mpAudioPolicyDev(NULL), mpAudioPolicy(NULL) { char value[PROPERTY_VALUE_MAX]; const struct hw_module_t *module; @@ -77,24 +77,28 @@ AudioPolicyService::AudioPolicyService() mOutputCommandThread = new AudioCommandThread(String8("ApmOutput"), this); /* instantiate the audio policy manager */ rc = hw_get_module(AUDIO_POLICY_HARDWARE_MODULE_ID, &module); - if (rc) + if (rc) { return; + } rc = audio_policy_dev_open(module, &mpAudioPolicyDev); ALOGE_IF(rc, "couldn't open audio policy device (%s)", strerror(-rc)); - if (rc) + if (rc) { return; + } rc = mpAudioPolicyDev->create_audio_policy(mpAudioPolicyDev, &aps_ops, this, &mpAudioPolicy); ALOGE_IF(rc, "couldn't create audio policy (%s)", strerror(-rc)); - if (rc) + if (rc) { return; + } rc = mpAudioPolicy->init_check(mpAudioPolicy); ALOGE_IF(rc, "couldn't init_check the audio policy (%s)", strerror(-rc)); - if (rc) + if (rc) { return; + } ALOGI("Loaded audio policy from %s (%s)", module->name, module->id); @@ -126,10 +130,12 @@ AudioPolicyService::~AudioPolicyService() } mInputs.clear(); - if (mpAudioPolicy != NULL && mpAudioPolicyDev != NULL) + if (mpAudioPolicy != NULL && mpAudioPolicyDev != NULL) { mpAudioPolicyDev->destroy_audio_policy(mpAudioPolicyDev, mpAudioPolicy); - if (mpAudioPolicyDev != NULL) + } + if (mpAudioPolicyDev != NULL) { audio_policy_dev_close(mpAudioPolicyDev); + } } status_t AudioPolicyService::setDeviceConnectionState(audio_devices_t device, @@ -469,8 +475,9 @@ audio_devices_t AudioPolicyService::getDevicesForStream(audio_stream_type_t stre audio_io_handle_t AudioPolicyService::getOutputForEffect(const effect_descriptor_t *desc) { + // FIXME change return type to status_t, and return NO_INIT here if (mpAudioPolicy == NULL) { - return NO_INIT; + return 0; } Mutex::Autolock _l(mLock); return mpAudioPolicy->get_output_for_effect(mpAudioPolicy, desc); @@ -606,7 +613,7 @@ status_t AudioPolicyService::dumpInternals(int fd) return NO_ERROR; } -status_t AudioPolicyService::dump(int fd, const Vector<String16>& args) +status_t AudioPolicyService::dump(int fd, const Vector<String16>& args __unused) { if (!dumpAllowed()) { dumpPermissionDenial(fd); @@ -1114,11 +1121,13 @@ int AudioPolicyService::setStreamVolume(audio_stream_type_t stream, int AudioPolicyService::startTone(audio_policy_tone_t tone, audio_stream_type_t stream) { - if (tone != AUDIO_POLICY_TONE_IN_CALL_NOTIFICATION) + if (tone != AUDIO_POLICY_TONE_IN_CALL_NOTIFICATION) { ALOGE("startTone: illegal tone requested (%d)", tone); - if (stream != AUDIO_STREAM_VOICE_CALL) + } + if (stream != AUDIO_STREAM_VOICE_CALL) { ALOGE("startTone: illegal stream (%d) requested for tone %d", stream, tone); + } mTonePlaybackThread->startToneCommand(ToneGenerator::TONE_SUP_CALL_WAITING, AUDIO_STREAM_VOICE_CALL); return 0; @@ -1452,7 +1461,7 @@ status_t AudioPolicyService::loadPreProcessorConfig(const char *path) extern "C" { -static audio_module_handle_t aps_load_hw_module(void *service, +static audio_module_handle_t aps_load_hw_module(void *service __unused, const char *name) { sp<IAudioFlinger> af = AudioSystem::get_audio_flinger(); @@ -1465,7 +1474,7 @@ static audio_module_handle_t aps_load_hw_module(void *service, } // deprecated: replaced by aps_open_output_on_module() -static audio_io_handle_t aps_open_output(void *service, +static audio_io_handle_t aps_open_output(void *service __unused, audio_devices_t *pDevices, uint32_t *pSamplingRate, audio_format_t *pFormat, @@ -1483,7 +1492,7 @@ static audio_io_handle_t aps_open_output(void *service, pLatencyMs, flags); } -static audio_io_handle_t aps_open_output_on_module(void *service, +static audio_io_handle_t aps_open_output_on_module(void *service __unused, audio_module_handle_t module, audio_devices_t *pDevices, uint32_t *pSamplingRate, @@ -1502,7 +1511,7 @@ static audio_io_handle_t aps_open_output_on_module(void *service, pLatencyMs, flags, offloadInfo); } -static audio_io_handle_t aps_open_dup_output(void *service, +static audio_io_handle_t aps_open_dup_output(void *service __unused, audio_io_handle_t output1, audio_io_handle_t output2) { @@ -1514,16 +1523,17 @@ static audio_io_handle_t aps_open_dup_output(void *service, return af->openDuplicateOutput(output1, output2); } -static int aps_close_output(void *service, audio_io_handle_t output) +static int aps_close_output(void *service __unused, audio_io_handle_t output) { sp<IAudioFlinger> af = AudioSystem::get_audio_flinger(); - if (af == 0) + if (af == 0) { return PERMISSION_DENIED; + } return af->closeOutput(output); } -static int aps_suspend_output(void *service, audio_io_handle_t output) +static int aps_suspend_output(void *service __unused, audio_io_handle_t output) { sp<IAudioFlinger> af = AudioSystem::get_audio_flinger(); if (af == 0) { @@ -1534,7 +1544,7 @@ static int aps_suspend_output(void *service, audio_io_handle_t output) return af->suspendOutput(output); } -static int aps_restore_output(void *service, audio_io_handle_t output) +static int aps_restore_output(void *service __unused, audio_io_handle_t output) { sp<IAudioFlinger> af = AudioSystem::get_audio_flinger(); if (af == 0) { @@ -1546,12 +1556,12 @@ static int aps_restore_output(void *service, audio_io_handle_t output) } // deprecated: replaced by aps_open_input_on_module(), and acoustics parameter is ignored -static audio_io_handle_t aps_open_input(void *service, +static audio_io_handle_t aps_open_input(void *service __unused, audio_devices_t *pDevices, uint32_t *pSamplingRate, audio_format_t *pFormat, audio_channel_mask_t *pChannelMask, - audio_in_acoustics_t acoustics) + audio_in_acoustics_t acoustics __unused) { sp<IAudioFlinger> af = AudioSystem::get_audio_flinger(); if (af == 0) { @@ -1562,7 +1572,7 @@ static audio_io_handle_t aps_open_input(void *service, return af->openInput((audio_module_handle_t)0, pDevices, pSamplingRate, pFormat, pChannelMask); } -static audio_io_handle_t aps_open_input_on_module(void *service, +static audio_io_handle_t aps_open_input_on_module(void *service __unused, audio_module_handle_t module, audio_devices_t *pDevices, uint32_t *pSamplingRate, @@ -1578,37 +1588,40 @@ static audio_io_handle_t aps_open_input_on_module(void *service, return af->openInput(module, pDevices, pSamplingRate, pFormat, pChannelMask); } -static int aps_close_input(void *service, audio_io_handle_t input) +static int aps_close_input(void *service __unused, audio_io_handle_t input) { sp<IAudioFlinger> af = AudioSystem::get_audio_flinger(); - if (af == 0) + if (af == 0) { return PERMISSION_DENIED; + } return af->closeInput(input); } -static int aps_set_stream_output(void *service, audio_stream_type_t stream, +static int aps_set_stream_output(void *service __unused, audio_stream_type_t stream, audio_io_handle_t output) { sp<IAudioFlinger> af = AudioSystem::get_audio_flinger(); - if (af == 0) + if (af == 0) { return PERMISSION_DENIED; + } return af->setStreamOutput(stream, output); } -static int aps_move_effects(void *service, int session, +static int aps_move_effects(void *service __unused, int session, audio_io_handle_t src_output, audio_io_handle_t dst_output) { sp<IAudioFlinger> af = AudioSystem::get_audio_flinger(); - if (af == 0) + if (af == 0) { return PERMISSION_DENIED; + } return af->moveEffects(session, src_output, dst_output); } -static char * aps_get_parameters(void *service, audio_io_handle_t io_handle, +static char * aps_get_parameters(void *service __unused, audio_io_handle_t io_handle, const char *keys) { String8 result = AudioSystem::getParameters(io_handle, String8(keys)); @@ -1659,24 +1672,24 @@ static int aps_set_voice_volume(void *service, float volume, int delay_ms) namespace { struct audio_policy_service_ops aps_ops = { - open_output : aps_open_output, - open_duplicate_output : aps_open_dup_output, - close_output : aps_close_output, - suspend_output : aps_suspend_output, - restore_output : aps_restore_output, - open_input : aps_open_input, - close_input : aps_close_input, - set_stream_volume : aps_set_stream_volume, - set_stream_output : aps_set_stream_output, - set_parameters : aps_set_parameters, - get_parameters : aps_get_parameters, - start_tone : aps_start_tone, - stop_tone : aps_stop_tone, - set_voice_volume : aps_set_voice_volume, - move_effects : aps_move_effects, - load_hw_module : aps_load_hw_module, - open_output_on_module : aps_open_output_on_module, - open_input_on_module : aps_open_input_on_module, + .open_output = aps_open_output, + .open_duplicate_output = aps_open_dup_output, + .close_output = aps_close_output, + .suspend_output = aps_suspend_output, + .restore_output = aps_restore_output, + .open_input = aps_open_input, + .close_input = aps_close_input, + .set_stream_volume = aps_set_stream_volume, + .set_stream_output = aps_set_stream_output, + .set_parameters = aps_set_parameters, + .get_parameters = aps_get_parameters, + .start_tone = aps_start_tone, + .stop_tone = aps_stop_tone, + .set_voice_volume = aps_set_voice_volume, + .move_effects = aps_move_effects, + .load_hw_module = aps_load_hw_module, + .open_output_on_module = aps_open_output_on_module, + .open_input_on_module = aps_open_input_on_module, }; }; // namespace <unnamed> diff --git a/services/audioflinger/AudioResampler.cpp b/services/audioflinger/AudioResampler.cpp index e5cceb1..b206116 100644 --- a/services/audioflinger/AudioResampler.cpp +++ b/services/audioflinger/AudioResampler.cpp @@ -25,6 +25,7 @@ #include "AudioResampler.h" #include "AudioResamplerSinc.h" #include "AudioResamplerCubic.h" +#include "AudioResamplerDyn.h" #ifdef __arm__ #include <machine/cpu-features.h> @@ -77,6 +78,9 @@ private: int mX0R; }; +/*static*/ +const double AudioResampler::kPhaseMultiplier = 1L << AudioResampler::kNumPhaseBits; + bool AudioResampler::qualityIsSupported(src_quality quality) { switch (quality) { @@ -85,6 +89,9 @@ bool AudioResampler::qualityIsSupported(src_quality quality) case MED_QUALITY: case HIGH_QUALITY: case VERY_HIGH_QUALITY: + case DYN_LOW_QUALITY: + case DYN_MED_QUALITY: + case DYN_HIGH_QUALITY: return true; default: return false; @@ -105,7 +112,7 @@ void AudioResampler::init_routine() if (*endptr == '\0') { defaultQuality = (src_quality) l; ALOGD("forcing AudioResampler quality to %d", defaultQuality); - if (defaultQuality < DEFAULT_QUALITY || defaultQuality > VERY_HIGH_QUALITY) { + if (defaultQuality < DEFAULT_QUALITY || defaultQuality > DYN_HIGH_QUALITY) { defaultQuality = DEFAULT_QUALITY; } } @@ -125,6 +132,12 @@ uint32_t AudioResampler::qualityMHz(src_quality quality) return 20; case VERY_HIGH_QUALITY: return 34; + case DYN_LOW_QUALITY: + return 4; + case DYN_MED_QUALITY: + return 6; + case DYN_HIGH_QUALITY: + return 12; } } @@ -148,6 +161,16 @@ AudioResampler* AudioResampler::create(int bitDepth, int inChannelCount, atFinalQuality = true; } + /* if the caller requests DEFAULT_QUALITY and af.resampler.property + * has not been set, the target resampler quality is set to DYN_MED_QUALITY, + * and allowed to "throttle" down to DYN_LOW_QUALITY if necessary + * due to estimated CPU load of having too many active resamplers + * (the code below the if). + */ + if (quality == DEFAULT_QUALITY) { + quality = DYN_MED_QUALITY; + } + // naive implementation of CPU load throttling doesn't account for whether resampler is active pthread_mutex_lock(&mutex); for (;;) { @@ -162,7 +185,6 @@ AudioResampler* AudioResampler::create(int bitDepth, int inChannelCount, // not enough CPU available for proposed quality level, so try next lowest level switch (quality) { default: - case DEFAULT_QUALITY: case LOW_QUALITY: atFinalQuality = true; break; @@ -175,6 +197,15 @@ AudioResampler* AudioResampler::create(int bitDepth, int inChannelCount, case VERY_HIGH_QUALITY: quality = HIGH_QUALITY; break; + case DYN_LOW_QUALITY: + atFinalQuality = true; + break; + case DYN_MED_QUALITY: + quality = DYN_LOW_QUALITY; + break; + case DYN_HIGH_QUALITY: + quality = DYN_MED_QUALITY; + break; } } pthread_mutex_unlock(&mutex); @@ -183,7 +214,6 @@ AudioResampler* AudioResampler::create(int bitDepth, int inChannelCount, switch (quality) { default: - case DEFAULT_QUALITY: case LOW_QUALITY: ALOGV("Create linear Resampler"); resampler = new AudioResamplerOrder1(bitDepth, inChannelCount, sampleRate); @@ -200,6 +230,12 @@ AudioResampler* AudioResampler::create(int bitDepth, int inChannelCount, ALOGV("Create VERY_HIGH_QUALITY sinc Resampler = %d", quality); resampler = new AudioResamplerSinc(bitDepth, inChannelCount, sampleRate, quality); break; + case DYN_LOW_QUALITY: + case DYN_MED_QUALITY: + case DYN_HIGH_QUALITY: + ALOGV("Create dynamic Resampler = %d", quality); + resampler = new AudioResamplerDyn(bitDepth, inChannelCount, sampleRate, quality); + break; } // initialize resampler @@ -339,8 +375,9 @@ void AudioResamplerOrder1::resampleStereo16(int32_t* out, size_t outFrameCount, out[outputIndex++] += vl * Interp(mX0L, in[0], phaseFraction); out[outputIndex++] += vr * Interp(mX0R, in[1], phaseFraction); Advance(&inputIndex, &phaseFraction, phaseIncrement); - if (outputIndex == outputSampleCount) + if (outputIndex == outputSampleCount) { break; + } } // process input samples @@ -434,8 +471,9 @@ void AudioResamplerOrder1::resampleMono16(int32_t* out, size_t outFrameCount, out[outputIndex++] += vl * sample; out[outputIndex++] += vr * sample; Advance(&inputIndex, &phaseFraction, phaseIncrement); - if (outputIndex == outputSampleCount) + if (outputIndex == outputSampleCount) { break; + } } // process input samples @@ -514,6 +552,16 @@ void AudioResamplerOrder1::AsmMono16Loop(int16_t *in, int32_t* maxOutPt, int32_t size_t &outputIndex, int32_t* out, size_t &inputIndex, int32_t vl, int32_t vr, uint32_t &phaseFraction, uint32_t phaseIncrement) { + (void)maxOutPt; // remove unused parameter warnings + (void)maxInIdx; + (void)outputIndex; + (void)out; + (void)inputIndex; + (void)vl; + (void)vr; + (void)phaseFraction; + (void)phaseIncrement; + (void)in; #define MO_PARAM5 "36" // offset of parameter 5 (outputIndex) asm( @@ -625,6 +673,16 @@ void AudioResamplerOrder1::AsmStereo16Loop(int16_t *in, int32_t* maxOutPt, int32 size_t &outputIndex, int32_t* out, size_t &inputIndex, int32_t vl, int32_t vr, uint32_t &phaseFraction, uint32_t phaseIncrement) { + (void)maxOutPt; // remove unused parameter warnings + (void)maxInIdx; + (void)outputIndex; + (void)out; + (void)inputIndex; + (void)vl; + (void)vr; + (void)phaseFraction; + (void)phaseIncrement; + (void)in; #define ST_PARAM5 "40" // offset of parameter 5 (outputIndex) asm( "stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}\n" diff --git a/services/audioflinger/AudioResampler.h b/services/audioflinger/AudioResampler.h index 33e64ce..dc33f29 100644 --- a/services/audioflinger/AudioResampler.h +++ b/services/audioflinger/AudioResampler.h @@ -41,6 +41,9 @@ public: MED_QUALITY=2, HIGH_QUALITY=3, VERY_HIGH_QUALITY=4, + DYN_LOW_QUALITY=5, + DYN_MED_QUALITY=6, + DYN_HIGH_QUALITY=7, }; static AudioResampler* create(int bitDepth, int inChannelCount, @@ -81,7 +84,7 @@ protected: static const uint32_t kPhaseMask = (1LU<<kNumPhaseBits)-1; // multiplier to calculate fixed point phase increment - static const double kPhaseMultiplier = 1L << kNumPhaseBits; + static const double kPhaseMultiplier; AudioResampler(int bitDepth, int inChannelCount, int32_t sampleRate, src_quality quality); diff --git a/services/audioflinger/AudioResamplerCubic.cpp b/services/audioflinger/AudioResamplerCubic.cpp index 18e59e9..1f9714b 100644 --- a/services/audioflinger/AudioResamplerCubic.cpp +++ b/services/audioflinger/AudioResamplerCubic.cpp @@ -66,8 +66,9 @@ void AudioResamplerCubic::resampleStereo16(int32_t* out, size_t outFrameCount, if (mBuffer.frameCount == 0) { mBuffer.frameCount = inFrameCount; provider->getNextBuffer(&mBuffer, mPTS); - if (mBuffer.raw == NULL) + if (mBuffer.raw == NULL) { return; + } // ALOGW("New buffer: offset=%p, frames=%dn", mBuffer.raw, mBuffer.frameCount); } int16_t *in = mBuffer.i16; @@ -97,8 +98,9 @@ void AudioResamplerCubic::resampleStereo16(int32_t* out, size_t outFrameCount, mBuffer.frameCount = inFrameCount; provider->getNextBuffer(&mBuffer, calculateOutputPTS(outputIndex / 2)); - if (mBuffer.raw == NULL) + if (mBuffer.raw == NULL) { goto save_state; // ugly, but efficient + } in = mBuffer.i16; // ALOGW("New buffer: offset=%p, frames=%d", mBuffer.raw, mBuffer.frameCount); } @@ -132,8 +134,9 @@ void AudioResamplerCubic::resampleMono16(int32_t* out, size_t outFrameCount, if (mBuffer.frameCount == 0) { mBuffer.frameCount = inFrameCount; provider->getNextBuffer(&mBuffer, mPTS); - if (mBuffer.raw == NULL) + if (mBuffer.raw == NULL) { return; + } // ALOGW("New buffer: offset=%p, frames=%d", mBuffer.raw, mBuffer.frameCount); } int16_t *in = mBuffer.i16; @@ -163,8 +166,9 @@ void AudioResamplerCubic::resampleMono16(int32_t* out, size_t outFrameCount, mBuffer.frameCount = inFrameCount; provider->getNextBuffer(&mBuffer, calculateOutputPTS(outputIndex / 2)); - if (mBuffer.raw == NULL) + if (mBuffer.raw == NULL) { goto save_state; // ugly, but efficient + } // ALOGW("New buffer: offset=%p, frames=%dn", mBuffer.raw, mBuffer.frameCount); in = mBuffer.i16; } diff --git a/services/audioflinger/AudioResamplerDyn.cpp b/services/audioflinger/AudioResamplerDyn.cpp new file mode 100644 index 0000000..cd67df5 --- /dev/null +++ b/services/audioflinger/AudioResamplerDyn.cpp @@ -0,0 +1,547 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define LOG_TAG "AudioResamplerDyn" +//#define LOG_NDEBUG 0 + +#include <malloc.h> +#include <string.h> +#include <stdlib.h> +#include <dlfcn.h> +#include <math.h> + +#include <cutils/compiler.h> +#include <cutils/properties.h> +#include <utils/Log.h> + +#include "AudioResamplerFirOps.h" // USE_NEON and USE_INLINE_ASSEMBLY defined here +#include "AudioResamplerFirProcess.h" +#include "AudioResamplerFirProcessNeon.h" +#include "AudioResamplerFirGen.h" // requires math.h +#include "AudioResamplerDyn.h" + +//#define DEBUG_RESAMPLER + +namespace android { + +// generate a unique resample type compile-time constant (constexpr) +#define RESAMPLETYPE(CHANNELS, LOCKED, STRIDE, COEFTYPE) \ + ((((CHANNELS)-1)&1) | !!(LOCKED)<<1 | (COEFTYPE)<<2 \ + | ((STRIDE)==8 ? 1 : (STRIDE)==16 ? 2 : 0)<<3) + +/* + * InBuffer is a type agnostic input buffer. + * + * Layout of the state buffer for halfNumCoefs=8. + * + * [rrrrrrppppppppnnnnnnnnrrrrrrrrrrrrrrrrrrr.... rrrrrrr] + * S I R + * + * S = mState + * I = mImpulse + * R = mRingFull + * p = past samples, convoluted with the (p)ositive side of sinc() + * n = future samples, convoluted with the (n)egative side of sinc() + * r = extra space for implementing the ring buffer + */ + +template<typename TI> +AudioResamplerDyn::InBuffer<TI>::InBuffer() + : mState(NULL), mImpulse(NULL), mRingFull(NULL), mStateSize(0) { +} + +template<typename TI> +AudioResamplerDyn::InBuffer<TI>::~InBuffer() { + init(); +} + +template<typename TI> +void AudioResamplerDyn::InBuffer<TI>::init() { + free(mState); + mState = NULL; + mImpulse = NULL; + mRingFull = NULL; + mStateSize = 0; +} + +// resizes the state buffer to accommodate the appropriate filter length +template<typename TI> +void AudioResamplerDyn::InBuffer<TI>::resize(int CHANNELS, int halfNumCoefs) { + // calculate desired state size + int stateSize = halfNumCoefs * CHANNELS * 2 + * kStateSizeMultipleOfFilterLength; + + // check if buffer needs resizing + if (mState + && stateSize == mStateSize + && mRingFull-mState == mStateSize-halfNumCoefs*CHANNELS) { + return; + } + + // create new buffer + TI* state = (int16_t*)memalign(32, stateSize*sizeof(*state)); + memset(state, 0, stateSize*sizeof(*state)); + + // attempt to preserve state + if (mState) { + TI* srcLo = mImpulse - halfNumCoefs*CHANNELS; + TI* srcHi = mImpulse + halfNumCoefs*CHANNELS; + TI* dst = state; + + if (srcLo < mState) { + dst += mState-srcLo; + srcLo = mState; + } + if (srcHi > mState + mStateSize) { + srcHi = mState + mStateSize; + } + memcpy(dst, srcLo, (srcHi - srcLo) * sizeof(*srcLo)); + free(mState); + } + + // set class member vars + mState = state; + mStateSize = stateSize; + mImpulse = mState + halfNumCoefs*CHANNELS; // actually one sample greater than needed + mRingFull = mState + mStateSize - halfNumCoefs*CHANNELS; +} + +// copy in the input data into the head (impulse+halfNumCoefs) of the buffer. +template<typename TI> +template<int CHANNELS> +void AudioResamplerDyn::InBuffer<TI>::readAgain(TI*& impulse, const int halfNumCoefs, + const TI* const in, const size_t inputIndex) { + int16_t* head = impulse + halfNumCoefs*CHANNELS; + for (size_t i=0 ; i<CHANNELS ; i++) { + head[i] = in[inputIndex*CHANNELS + i]; + } +} + +// advance the impulse pointer, and load in data into the head (impulse+halfNumCoefs) +template<typename TI> +template<int CHANNELS> +void AudioResamplerDyn::InBuffer<TI>::readAdvance(TI*& impulse, const int halfNumCoefs, + const TI* const in, const size_t inputIndex) { + impulse += CHANNELS; + + if (CC_UNLIKELY(impulse >= mRingFull)) { + const size_t shiftDown = mRingFull - mState - halfNumCoefs*CHANNELS; + memcpy(mState, mState+shiftDown, halfNumCoefs*CHANNELS*2*sizeof(TI)); + impulse -= shiftDown; + } + readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex); +} + +void AudioResamplerDyn::Constants::set( + int L, int halfNumCoefs, int inSampleRate, int outSampleRate) +{ + int bits = 0; + int lscale = inSampleRate/outSampleRate < 2 ? L - 1 : + static_cast<int>(static_cast<uint64_t>(L)*inSampleRate/outSampleRate); + for (int i=lscale; i; ++bits, i>>=1) + ; + mL = L; + mShift = kNumPhaseBits - bits; + mHalfNumCoefs = halfNumCoefs; +} + +AudioResamplerDyn::AudioResamplerDyn(int bitDepth, + int inChannelCount, int32_t sampleRate, src_quality quality) + : AudioResampler(bitDepth, inChannelCount, sampleRate, quality), + mResampleType(0), mFilterSampleRate(0), mFilterQuality(DEFAULT_QUALITY), + mCoefBuffer(NULL) +{ + mVolumeSimd[0] = mVolumeSimd[1] = 0; + mConstants.set(128, 8, mSampleRate, mSampleRate); // TODO: set better +} + +AudioResamplerDyn::~AudioResamplerDyn() { + free(mCoefBuffer); +} + +void AudioResamplerDyn::init() { + mFilterSampleRate = 0; // always trigger new filter generation + mInBuffer.init(); +} + +void AudioResamplerDyn::setVolume(int16_t left, int16_t right) { + AudioResampler::setVolume(left, right); + mVolumeSimd[0] = static_cast<int32_t>(left)<<16; + mVolumeSimd[1] = static_cast<int32_t>(right)<<16; +} + +template <typename T> T max(T a, T b) {return a > b ? a : b;} + +template <typename T> T absdiff(T a, T b) {return a > b ? a - b : b - a;} + +template<typename T> +void AudioResamplerDyn::createKaiserFir(Constants &c, double stopBandAtten, + int inSampleRate, int outSampleRate, double tbwCheat) { + T* buf = reinterpret_cast<T*>(memalign(32, (c.mL+1)*c.mHalfNumCoefs*sizeof(T))); + static const double atten = 0.9998; // to avoid ripple overflow + double fcr; + double tbw = firKaiserTbw(c.mHalfNumCoefs, stopBandAtten); + + if (inSampleRate < outSampleRate) { // upsample + fcr = max(0.5*tbwCheat - tbw/2, tbw/2); + } else { // downsample + fcr = max(0.5*tbwCheat*outSampleRate/inSampleRate - tbw/2, tbw/2); + } + // create and set filter + firKaiserGen(buf, c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten); + c.setBuf(buf); + if (mCoefBuffer) { + free(mCoefBuffer); + } + mCoefBuffer = buf; +#ifdef DEBUG_RESAMPLER + // print basic filter stats + printf("L:%d hnc:%d stopBandAtten:%lf fcr:%lf atten:%lf tbw:%lf\n", + c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten, tbw); + // test the filter and report results + double fp = (fcr - tbw/2)/c.mL; + double fs = (fcr + tbw/2)/c.mL; + double passMin, passMax, passRipple; + double stopMax, stopRipple; + testFir(buf, c.mL, c.mHalfNumCoefs, fp, fs, /*passSteps*/ 1000, /*stopSteps*/ 100000, + passMin, passMax, passRipple, stopMax, stopRipple); + printf("passband(%lf, %lf): %.8lf %.8lf %.8lf\n", 0., fp, passMin, passMax, passRipple); + printf("stopband(%lf, %lf): %.8lf %.3lf\n", fs, 0.5, stopMax, stopRipple); +#endif +} + +// recursive gcd. Using objdump, it appears the tail recursion is converted to a while loop. +static int gcd(int n, int m) { + if (m == 0) { + return n; + } + return gcd(m, n % m); +} + +static bool isClose(int32_t newSampleRate, int32_t prevSampleRate, + int32_t filterSampleRate, int32_t outSampleRate) { + + // different upsampling ratios do not need a filter change. + if (filterSampleRate != 0 + && filterSampleRate < outSampleRate + && newSampleRate < outSampleRate) + return true; + + // check design criteria again if downsampling is detected. + int pdiff = absdiff(newSampleRate, prevSampleRate); + int adiff = absdiff(newSampleRate, filterSampleRate); + + // allow up to 6% relative change increments. + // allow up to 12% absolute change increments (from filter design) + return pdiff < prevSampleRate>>4 && adiff < filterSampleRate>>3; +} + +void AudioResamplerDyn::setSampleRate(int32_t inSampleRate) { + if (mInSampleRate == inSampleRate) { + return; + } + int32_t oldSampleRate = mInSampleRate; + int32_t oldHalfNumCoefs = mConstants.mHalfNumCoefs; + uint32_t oldPhaseWrapLimit = mConstants.mL << mConstants.mShift; + bool useS32 = false; + + mInSampleRate = inSampleRate; + + // TODO: Add precalculated Equiripple filters + + if (mFilterQuality != getQuality() || + !isClose(inSampleRate, oldSampleRate, mFilterSampleRate, mSampleRate)) { + mFilterSampleRate = inSampleRate; + mFilterQuality = getQuality(); + + // Begin Kaiser Filter computation + // + // The quantization floor for S16 is about 96db - 10*log_10(#length) + 3dB. + // Keep the stop band attenuation no greater than 84-85dB for 32 length S16 filters + // + // For s32 we keep the stop band attenuation at the same as 16b resolution, about + // 96-98dB + // + + double stopBandAtten; + double tbwCheat = 1.; // how much we "cheat" into aliasing + int halfLength; + if (mFilterQuality == DYN_HIGH_QUALITY) { + // 32b coefficients, 64 length + useS32 = true; + stopBandAtten = 98.; + halfLength = 32; + } else if (mFilterQuality == DYN_LOW_QUALITY) { + // 16b coefficients, 16-32 length + useS32 = false; + stopBandAtten = 80.; + if (mSampleRate >= inSampleRate * 2) { + halfLength = 16; + } else { + halfLength = 8; + } + if (mSampleRate >= inSampleRate) { + tbwCheat = 1.05; + } else { + tbwCheat = 1.03; + } + } else { // DYN_MED_QUALITY + // 16b coefficients, 32-64 length + // note: > 64 length filters with 16b coefs can have quantization noise problems + useS32 = false; + stopBandAtten = 84.; + if (mSampleRate >= inSampleRate * 4) { + halfLength = 32; + } else if (mSampleRate >= inSampleRate * 2) { + halfLength = 24; + } else { + halfLength = 16; + } + if (mSampleRate >= inSampleRate) { + tbwCheat = 1.03; + } else { + tbwCheat = 1.01; + } + } + + // determine the number of polyphases in the filterbank. + // for 16b, it is desirable to have 2^(16/2) = 256 phases. + // https://ccrma.stanford.edu/~jos/resample/Relation_Interpolation_Error_Quantization.html + // + // We are a bit more lax on this. + + int phases = mSampleRate / gcd(mSampleRate, inSampleRate); + + // TODO: Once dynamic sample rate change is an option, the code below + // should be modified to execute only when dynamic sample rate change is enabled. + // + // as above, #phases less than 63 is too few phases for accurate linear interpolation. + // we increase the phases to compensate, but more phases means more memory per + // filter and more time to compute the filter. + // + // if we know that the filter will be used for dynamic sample rate changes, + // that would allow us skip this part for fixed sample rate resamplers. + // + while (phases<63) { + phases *= 2; // this code only needed to support dynamic rate changes + } + + if (phases>=256) { // too many phases, always interpolate + phases = 127; + } + + // create the filter + mConstants.set(phases, halfLength, inSampleRate, mSampleRate); + if (useS32) { + createKaiserFir<int32_t>(mConstants, stopBandAtten, + inSampleRate, mSampleRate, tbwCheat); + } else { + createKaiserFir<int16_t>(mConstants, stopBandAtten, + inSampleRate, mSampleRate, tbwCheat); + } + } // End Kaiser filter + + // update phase and state based on the new filter. + const Constants& c(mConstants); + mInBuffer.resize(mChannelCount, c.mHalfNumCoefs); + const uint32_t phaseWrapLimit = c.mL << c.mShift; + // try to preserve as much of the phase fraction as possible for on-the-fly changes + mPhaseFraction = static_cast<unsigned long long>(mPhaseFraction) + * phaseWrapLimit / oldPhaseWrapLimit; + mPhaseFraction %= phaseWrapLimit; // should not do anything, but just in case. + mPhaseIncrement = static_cast<uint32_t>(static_cast<double>(phaseWrapLimit) + * inSampleRate / mSampleRate); + + // determine which resampler to use + // check if locked phase (works only if mPhaseIncrement has no "fractional phase bits") + int locked = (mPhaseIncrement << (sizeof(mPhaseIncrement)*8 - c.mShift)) == 0; + int stride = (c.mHalfNumCoefs&7)==0 ? 16 : (c.mHalfNumCoefs&3)==0 ? 8 : 2; + if (locked) { + mPhaseFraction = mPhaseFraction >> c.mShift << c.mShift; // remove fractional phase + } + + mResampleType = RESAMPLETYPE(mChannelCount, locked, stride, !!useS32); +#ifdef DEBUG_RESAMPLER + printf("channels:%d %s stride:%d %s coef:%d shift:%d\n", + mChannelCount, locked ? "locked" : "interpolated", + stride, useS32 ? "S32" : "S16", 2*c.mHalfNumCoefs, c.mShift); +#endif +} + +void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount, + AudioBufferProvider* provider) +{ + // TODO: + // 24 cases - this perhaps can be reduced later, as testing might take too long + switch (mResampleType) { + + // stride 16 (falls back to stride 2 for machines that do not support NEON) + case RESAMPLETYPE(1, true, 16, 0): + return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(2, true, 16, 0): + return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(1, false, 16, 0): + return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(2, false, 16, 0): + return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(1, true, 16, 1): + return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(2, true, 16, 1): + return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(1, false, 16, 1): + return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(2, false, 16, 1): + return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider); +#if 0 + // TODO: Remove these? + // stride 8 + case RESAMPLETYPE(1, true, 8, 0): + return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(2, true, 8, 0): + return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(1, false, 8, 0): + return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(2, false, 8, 0): + return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(1, true, 8, 1): + return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(2, true, 8, 1): + return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(1, false, 8, 1): + return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(2, false, 8, 1): + return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + // stride 2 (can handle any filter length) + case RESAMPLETYPE(1, true, 2, 0): + return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(2, true, 2, 0): + return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(1, false, 2, 0): + return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(2, false, 2, 0): + return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider); + case RESAMPLETYPE(1, true, 2, 1): + return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(2, true, 2, 1): + return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(1, false, 2, 1): + return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider); + case RESAMPLETYPE(2, false, 2, 1): + return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider); +#endif + default: + ; // error + } +} + +template<int CHANNELS, bool LOCKED, int STRIDE, typename TC> +void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount, + const TC* const coefs, AudioBufferProvider* provider) +{ + const Constants& c(mConstants); + int16_t* impulse = mInBuffer.getImpulse(); + size_t inputIndex = mInputIndex; + uint32_t phaseFraction = mPhaseFraction; + const uint32_t phaseIncrement = mPhaseIncrement; + size_t outputIndex = 0; + size_t outputSampleCount = outFrameCount * 2; // stereo output + size_t inFrameCount = (outFrameCount*mInSampleRate)/mSampleRate; + const uint32_t phaseWrapLimit = c.mL << c.mShift; + + // NOTE: be very careful when modifying the code here. register + // pressure is very high and a small change might cause the compiler + // to generate far less efficient code. + // Always sanity check the result with objdump or test-resample. + + // the following logic is a bit convoluted to keep the main processing loop + // as tight as possible with register allocation. + while (outputIndex < outputSampleCount) { + // buffer is empty, fetch a new one + while (mBuffer.frameCount == 0) { + mBuffer.frameCount = inFrameCount; + provider->getNextBuffer(&mBuffer, + calculateOutputPTS(outputIndex / 2)); + if (mBuffer.raw == NULL) { + goto resample_exit; + } + if (phaseFraction >= phaseWrapLimit) { // read in data + mInBuffer.readAdvance<CHANNELS>( + impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex); + phaseFraction -= phaseWrapLimit; + while (phaseFraction >= phaseWrapLimit) { + inputIndex++; + if (inputIndex >= mBuffer.frameCount) { + inputIndex -= mBuffer.frameCount; + provider->releaseBuffer(&mBuffer); + break; + } + mInBuffer.readAdvance<CHANNELS>( + impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex); + phaseFraction -= phaseWrapLimit; + } + } + } + const int16_t* const in = mBuffer.i16; + const size_t frameCount = mBuffer.frameCount; + const int coefShift = c.mShift; + const int halfNumCoefs = c.mHalfNumCoefs; + const int32_t* const volumeSimd = mVolumeSimd; + + // reread the last input in. + mInBuffer.readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex); + + // main processing loop + while (CC_LIKELY(outputIndex < outputSampleCount)) { + // caution: fir() is inlined and may be large. + // output will be loaded with the appropriate values + // + // from the input samples in impulse[-halfNumCoefs+1]... impulse[halfNumCoefs] + // from the polyphase filter of (phaseFraction / phaseWrapLimit) in coefs. + // + fir<CHANNELS, LOCKED, STRIDE>( + &out[outputIndex], + phaseFraction, phaseWrapLimit, + coefShift, halfNumCoefs, coefs, + impulse, volumeSimd); + outputIndex += 2; + + phaseFraction += phaseIncrement; + while (phaseFraction >= phaseWrapLimit) { + inputIndex++; + if (inputIndex >= frameCount) { + goto done; // need a new buffer + } + mInBuffer.readAdvance<CHANNELS>(impulse, halfNumCoefs, in, inputIndex); + phaseFraction -= phaseWrapLimit; + } + } +done: + // often arrives here when input buffer runs out + if (inputIndex >= frameCount) { + inputIndex -= frameCount; + provider->releaseBuffer(&mBuffer); + // mBuffer.frameCount MUST be zero here. + } + } + +resample_exit: + mInBuffer.setImpulse(impulse); + mInputIndex = inputIndex; + mPhaseFraction = phaseFraction; +} + +// ---------------------------------------------------------------------------- +}; // namespace android diff --git a/services/audioflinger/AudioResamplerDyn.h b/services/audioflinger/AudioResamplerDyn.h new file mode 100644 index 0000000..df1fdbe --- /dev/null +++ b/services/audioflinger/AudioResamplerDyn.h @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_AUDIO_RESAMPLER_DYN_H +#define ANDROID_AUDIO_RESAMPLER_DYN_H + +#include <stdint.h> +#include <sys/types.h> +#include <cutils/log.h> + +#include "AudioResampler.h" + +namespace android { + +class AudioResamplerDyn: public AudioResampler { +public: + AudioResamplerDyn(int bitDepth, int inChannelCount, int32_t sampleRate, + src_quality quality); + + virtual ~AudioResamplerDyn(); + + virtual void init(); + + virtual void setSampleRate(int32_t inSampleRate); + + virtual void setVolume(int16_t left, int16_t right); + + virtual void resample(int32_t* out, size_t outFrameCount, + AudioBufferProvider* provider); + +private: + + class Constants { // stores the filter constants. + public: + Constants() : + mL(0), mShift(0), mHalfNumCoefs(0), mFirCoefsS16(NULL) + {} + void set(int L, int halfNumCoefs, + int inSampleRate, int outSampleRate); + inline void setBuf(int16_t* buf) { + mFirCoefsS16 = buf; + } + inline void setBuf(int32_t* buf) { + mFirCoefsS32 = buf; + } + + int mL; // interpolation phases in the filter. + int mShift; // right shift to get polyphase index + unsigned int mHalfNumCoefs; // filter half #coefs + union { // polyphase filter bank + const int16_t* mFirCoefsS16; + const int32_t* mFirCoefsS32; + }; + }; + + // Input buffer management for a given input type TI, now (int16_t) + // Is agnostic of the actual type, can work with int32_t and float. + template<typename TI> + class InBuffer { + public: + InBuffer(); + ~InBuffer(); + void init(); + void resize(int CHANNELS, int halfNumCoefs); + + // used for direct management of the mImpulse pointer + inline TI* getImpulse() { + return mImpulse; + } + inline void setImpulse(TI *impulse) { + mImpulse = impulse; + } + template<int CHANNELS> + inline void readAgain(TI*& impulse, const int halfNumCoefs, + const TI* const in, const size_t inputIndex); + template<int CHANNELS> + inline void readAdvance(TI*& impulse, const int halfNumCoefs, + const TI* const in, const size_t inputIndex); + + private: + // tuning parameter guidelines: 2 <= multiple <= 8 + static const int kStateSizeMultipleOfFilterLength = 4; + + TI* mState; // base pointer for the input buffer storage + TI* mImpulse; // current location of the impulse response (centered) + TI* mRingFull; // mState <= mImpulse < mRingFull + // in general, mRingFull = mState + mStateSize - halfNumCoefs*CHANNELS. + size_t mStateSize; // in units of TI. + }; + + template<int CHANNELS, bool LOCKED, int STRIDE, typename TC> + void resample(int32_t* out, size_t outFrameCount, + const TC* const coefs, AudioBufferProvider* provider); + + template<typename T> + void createKaiserFir(Constants &c, double stopBandAtten, + int inSampleRate, int outSampleRate, double tbwCheat); + + InBuffer<int16_t> mInBuffer; + Constants mConstants; // current set of coefficient parameters + int32_t __attribute__ ((aligned (8))) mVolumeSimd[2]; + int32_t mResampleType; // contains the resample type. + int32_t mFilterSampleRate; // designed filter sample rate. + src_quality mFilterQuality; // designed filter quality. + void* mCoefBuffer; // if a filter is created, this is not null +}; + +// ---------------------------------------------------------------------------- +}; // namespace android + +#endif /*ANDROID_AUDIO_RESAMPLER_DYN_H*/ diff --git a/services/audioflinger/AudioResamplerFirGen.h b/services/audioflinger/AudioResamplerFirGen.h new file mode 100644 index 0000000..fac3001 --- /dev/null +++ b/services/audioflinger/AudioResamplerFirGen.h @@ -0,0 +1,684 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_AUDIO_RESAMPLER_FIR_GEN_H +#define ANDROID_AUDIO_RESAMPLER_FIR_GEN_H + +namespace android { + +/* + * generates a sine wave at equal steps. + * + * As most of our functions use sine or cosine at equal steps, + * it is very efficient to compute them that way (single multiply and subtract), + * rather than invoking the math library sin() or cos() each time. + * + * SineGen uses Goertzel's Algorithm (as a generator not a filter) + * to calculate sine(wstart + n * wstep) or cosine(wstart + n * wstep) + * by stepping through 0, 1, ... n. + * + * e^i(wstart+wstep) = 2cos(wstep) * e^i(wstart) - e^i(wstart-wstep) + * + * or looking at just the imaginary sine term, as the cosine follows identically: + * + * sin(wstart+wstep) = 2cos(wstep) * sin(wstart) - sin(wstart-wstep) + * + * Goertzel's algorithm is more efficient than the angle addition formula, + * e^i(wstart+wstep) = e^i(wstart) * e^i(wstep), which takes up to + * 4 multiplies and 2 adds (or 3* and 3+) and requires both sine and + * cosine generation due to the complex * complex multiply (full rotation). + * + * See: http://en.wikipedia.org/wiki/Goertzel_algorithm + * + */ + +class SineGen { +public: + SineGen(double wstart, double wstep, bool cosine = false) { + if (cosine) { + mCurrent = cos(wstart); + mPrevious = cos(wstart - wstep); + } else { + mCurrent = sin(wstart); + mPrevious = sin(wstart - wstep); + } + mTwoCos = 2.*cos(wstep); + } + SineGen(double expNow, double expPrev, double twoCosStep) { + mCurrent = expNow; + mPrevious = expPrev; + mTwoCos = twoCosStep; + } + inline double value() const { + return mCurrent; + } + inline void advance() { + double tmp = mCurrent; + mCurrent = mCurrent*mTwoCos - mPrevious; + mPrevious = tmp; + } + inline double valueAdvance() { + double tmp = mCurrent; + mCurrent = mCurrent*mTwoCos - mPrevious; + mPrevious = tmp; + return tmp; + } + +private: + double mCurrent; // current value of sine/cosine + double mPrevious; // previous value of sine/cosine + double mTwoCos; // stepping factor +}; + +/* + * generates a series of sine generators, phase offset by fixed steps. + * + * This is used to generate polyphase sine generators, one per polyphase + * in the filter code below. + * + * The SineGen returned by value() starts at innerStart = outerStart + n*outerStep; + * increments by innerStep. + * + */ + +class SineGenGen { +public: + SineGenGen(double outerStart, double outerStep, double innerStep, bool cosine = false) + : mSineInnerCur(outerStart, outerStep, cosine), + mSineInnerPrev(outerStart-innerStep, outerStep, cosine) + { + mTwoCos = 2.*cos(innerStep); + } + inline SineGen value() { + return SineGen(mSineInnerCur.value(), mSineInnerPrev.value(), mTwoCos); + } + inline void advance() { + mSineInnerCur.advance(); + mSineInnerPrev.advance(); + } + inline SineGen valueAdvance() { + return SineGen(mSineInnerCur.valueAdvance(), mSineInnerPrev.valueAdvance(), mTwoCos); + } + +private: + SineGen mSineInnerCur; // generate the inner sine values (stepped by outerStep). + SineGen mSineInnerPrev; // generate the inner sine previous values + // (behind by innerStep, stepped by outerStep). + double mTwoCos; // the inner stepping factor for the returned SineGen. +}; + +static inline double sqr(double x) { + return x * x; +} + +/* + * rounds a double to the nearest integer for FIR coefficients. + * + * One variant uses noise shaping, which must keep error history + * to work (the err parameter, initialized to 0). + * The other variant is a non-noise shaped version for + * S32 coefficients (noise shaping doesn't gain much). + * + * Caution: No bounds saturation is applied, but isn't needed in this case. + * + * @param x is the value to round. + * + * @param maxval is the maximum integer scale factor expressed as an int64 (for headroom). + * Typically this may be the maximum positive integer+1 (using the fact that double precision + * FIR coefficients generated here are never that close to 1.0 to pose an overflow condition). + * + * @param err is the previous error (actual - rounded) for the previous rounding op. + * For 16b coefficients this can improve stopband dB performance by up to 2dB. + * + * Many variants exist for the noise shaping: http://en.wikipedia.org/wiki/Noise_shaping + * + */ + +static inline int64_t toint(double x, int64_t maxval, double& err) { + double val = x * maxval; + double ival = floor(val + 0.5 + err*0.2); + err = val - ival; + return static_cast<int64_t>(ival); +} + +static inline int64_t toint(double x, int64_t maxval) { + return static_cast<int64_t>(floor(x * maxval + 0.5)); +} + +/* + * Modified Bessel function of the first kind + * http://en.wikipedia.org/wiki/Bessel_function + * + * The formulas are taken from Abramowitz and Stegun, + * _Handbook of Mathematical Functions_ (links below): + * + * http://people.math.sfu.ca/~cbm/aands/page_375.htm + * http://people.math.sfu.ca/~cbm/aands/page_378.htm + * + * http://dlmf.nist.gov/10.25 + * http://dlmf.nist.gov/10.40 + * + * Note we assume x is nonnegative (the function is symmetric, + * pass in the absolute value as needed). + * + * Constants are compile time derived with templates I0Term<> and + * I0ATerm<> to the precision of the compiler. The series can be expanded + * to any precision needed, but currently set around 24b precision. + * + * We use a bit of template math here, constexpr would probably be + * more appropriate for a C++11 compiler. + * + * For the intermediate range 3.75 < x < 15, we use minimax polynomial fit. + * + */ + +template <int N> +struct I0Term { + static const double value = I0Term<N-1>::value / (4. * N * N); +}; + +template <> +struct I0Term<0> { + static const double value = 1.; +}; + +template <int N> +struct I0ATerm { + static const double value = I0ATerm<N-1>::value * (2.*N-1.) * (2.*N-1.) / (8. * N); +}; + +template <> +struct I0ATerm<0> { // 1/sqrt(2*PI); + static const double value = 0.398942280401432677939946059934381868475858631164934657665925; +}; + +#if USE_HORNERS_METHOD +/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ... + * using Horner's Method: http://en.wikipedia.org/wiki/Horner's_method + * + * This has fewer multiplications than Estrin's method below, but has back to back + * floating point dependencies. + * + * On ARM this appears to work slower, so USE_HORNERS_METHOD is not default enabled. + */ + +inline double Poly2(double A, double B, double x) { + return A + x * B; +} + +inline double Poly4(double A, double B, double C, double D, double x) { + return A + x * (B + x * (C + x * (D))); +} + +inline double Poly7(double A, double B, double C, double D, double E, double F, double G, + double x) { + return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G)))))); +} + +inline double Poly9(double A, double B, double C, double D, double E, double F, double G, + double H, double I, double x) { + return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G + x * (H + x * (I)))))))); +} + +#else +/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ... + * using Estrin's Method: http://en.wikipedia.org/wiki/Estrin's_scheme + * + * This is typically faster, perhaps gains about 5-10% overall on ARM processors + * over Horner's method above. + */ + +inline double Poly2(double A, double B, double x) { + return A + B * x; +} + +inline double Poly3(double A, double B, double C, double x, double x2) { + return Poly2(A, B, x) + C * x2; +} + +inline double Poly3(double A, double B, double C, double x) { + return Poly2(A, B, x) + C * x * x; +} + +inline double Poly4(double A, double B, double C, double D, double x, double x2) { + return Poly2(A, B, x) + Poly2(C, D, x) * x2; // same as poly2(poly2, poly2, x2); +} + +inline double Poly4(double A, double B, double C, double D, double x) { + return Poly4(A, B, C, D, x, x * x); +} + +inline double Poly7(double A, double B, double C, double D, double E, double F, double G, + double x) { + double x2 = x * x; + return Poly4(A, B, C, D, x, x2) + Poly3(E, F, G, x, x2) * (x2 * x2); +} + +inline double Poly8(double A, double B, double C, double D, double E, double F, double G, + double H, double x, double x2, double x4) { + return Poly4(A, B, C, D, x, x2) + Poly4(E, F, G, H, x, x2) * x4; +} + +inline double Poly9(double A, double B, double C, double D, double E, double F, double G, + double H, double I, double x) { + double x2 = x * x; +#if 1 + // It does not seem faster to explicitly decompose Poly8 into Poly4, but + // could depend on compiler floating point scheduling. + double x4 = x2 * x2; + return Poly8(A, B, C, D, E, F, G, H, x, x2, x4) + I * (x4 * x4); +#else + double val = Poly4(A, B, C, D, x, x2); + double x4 = x2 * x2; + return val + Poly4(E, F, G, H, x, x2) * x4 + I * (x4 * x4); +#endif +} +#endif + +static inline double I0(double x) { + if (x < 3.75) { + x *= x; + return Poly7(I0Term<0>::value, I0Term<1>::value, + I0Term<2>::value, I0Term<3>::value, + I0Term<4>::value, I0Term<5>::value, + I0Term<6>::value, x); // e < 1.6e-7 + } + if (1) { + /* + * Series expansion coefs are easy to calculate, but are expanded around 0, + * so error is unequal over the interval 0 < x < 3.75, the error being + * significantly better near 0. + * + * A better solution is to use precise minimax polynomial fits. + * + * We use a slightly more complicated solution for 3.75 < x < 15, based on + * the tables in Blair and Edwards, "Stable Rational Minimax Approximations + * to the Modified Bessel Functions I0(x) and I1(x)", Chalk Hill Nuclear Laboratory, + * AECL-4928. + * + * http://www.iaea.org/inis/collection/NCLCollectionStore/_Public/06/178/6178667.pdf + * + * See Table 11 for 0 < x < 15; e < 10^(-7.13). + * + * Note: Beta cannot exceed 15 (hence Stopband cannot exceed 144dB = 24b). + * + * This speeds up overall computation by about 40% over using the else clause below, + * which requires sqrt and exp. + * + */ + + x *= x; + double num = Poly9(-0.13544938430e9, -0.33153754512e8, + -0.19406631946e7, -0.48058318783e5, + -0.63269783360e3, -0.49520779070e1, + -0.24970910370e-1, -0.74741159550e-4, + -0.18257612460e-6, x); + double y = x - 225.; // reflection around 15 (squared) + double den = Poly4(-0.34598737196e8, 0.23852643181e6, + -0.70699387620e3, 0.10000000000e1, y); + return num / den; + +#if IO_EXTENDED_BETA + /* Table 42 for x > 15; e < 10^(-8.11). + * This is used for Beta>15, but is disabled here as + * we never use Beta that high. + * + * NOTE: This should be enabled only for x > 15. + */ + + double y = 1./x; + double z = y - (1./15); + double num = Poly2(0.415079861746e1, -0.5149092496e1, z); + double den = Poly3(0.103150763823e2, -0.14181687413e2, + 0.1000000000e1, z); + return exp(x) * sqrt(y) * num / den; +#endif + } else { + /* + * NOT USED, but reference for large Beta. + * + * Abramowitz and Stegun asymptotic formula. + * works for x > 3.75. + */ + double y = 1./x; + return exp(x) * sqrt(y) * + // note: reciprocal squareroot may be easier! + // http://en.wikipedia.org/wiki/Fast_inverse_square_root + Poly9(I0ATerm<0>::value, I0ATerm<1>::value, + I0ATerm<2>::value, I0ATerm<3>::value, + I0ATerm<4>::value, I0ATerm<5>::value, + I0ATerm<6>::value, I0ATerm<7>::value, + I0ATerm<8>::value, y); // (... e) < 1.9e-7 + } +} + +/* + * calculates the transition bandwidth for a Kaiser filter + * + * Formula 3.2.8, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48 + * Formula 7.76, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542 + * + * @param halfNumCoef is half the number of coefficients per filter phase. + * + * @param stopBandAtten is the stop band attenuation desired. + * + * @return the transition bandwidth in normalized frequency (0 <= f <= 0.5) + */ +static inline double firKaiserTbw(int halfNumCoef, double stopBandAtten) { + return (stopBandAtten - 7.95)/((2.*14.36)*halfNumCoef); +} + +/* + * calculates the fir transfer response of the overall polyphase filter at w. + * + * Calculates the DTFT transfer coefficient H(w) for 0 <= w <= PI, utilizing the + * fact that h[n] is symmetric (cosines only, no complex arithmetic). + * + * We use Goertzel's algorithm to accelerate the computation to essentially + * a single multiply and 2 adds per filter coefficient h[]. + * + * Be careful be careful to consider that h[n] is the overall polyphase filter, + * with L phases, so rescaling H(w)/L is probably what you expect for "unity gain", + * as you only use one of the polyphases at a time. + */ +template <typename T> +static inline double firTransfer(const T* coef, int L, int halfNumCoef, double w) { + double accum = static_cast<double>(coef[0])*0.5; // "center coefficient" from first bank + coef += halfNumCoef; // skip first filterbank (picked up by the last filterbank). +#if SLOW_FIRTRANSFER + /* Original code for reference. This is equivalent to the code below, but slower. */ + for (int i=1 ; i<=L ; ++i) { + for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) { + accum += cos(ix*w)*static_cast<double>(*coef++); + } + } +#else + /* + * Our overall filter is stored striped by polyphases, not a contiguous h[n]. + * We could fetch coefficients in a non-contiguous fashion + * but that will not scale to vector processing. + * + * We apply Goertzel's algorithm directly to each polyphase filter bank instead of + * using cosine generation/multiplication, thereby saving one multiply per inner loop. + * + * See: http://en.wikipedia.org/wiki/Goertzel_algorithm + * Also: Oppenheim and Schafer, _Discrete Time Signal Processing, 3e_, p. 720. + * + * We use the basic recursion to incorporate the cosine steps into real sequence x[n]: + * s[n] = x[n] + (2cosw)*s[n-1] + s[n-2] + * + * y[n] = s[n] - e^(iw)s[n-1] + * = sum_{k=-\infty}^{n} x[k]e^(-iw(n-k)) + * = e^(-iwn) sum_{k=0}^{n} x[k]e^(iwk) + * + * The summation contains the frequency steps we want multiplied by the source + * (similar to a DTFT). + * + * Using symmetry, and just the real part (be careful, this must happen + * after any internal complex multiplications), the polyphase filterbank + * transfer function is: + * + * Hpp[n, w, w_0] = sum_{k=0}^{n} x[k] * cos(wk + w_0) + * = Re{ e^(iwn + iw_0) y[n]} + * = cos(wn+w_0) * s[n] - cos(w(n+1)+w_0) * s[n-1] + * + * using the fact that s[n] of real x[n] is real. + * + */ + double dcos = 2. * cos(L*w); + int start = ((halfNumCoef)*L + 1); + SineGen cc((start - L) * w, w, true); // cosine + SineGen cp(start * w, w, true); // cosine + for (int i=1 ; i<=L ; ++i) { + double sc = 0; + double sp = 0; + for (int j=0 ; j<halfNumCoef ; ++j) { + double tmp = sc; + sc = static_cast<double>(*coef++) + dcos*sc - sp; + sp = tmp; + } + // If we are awfully clever, we can apply Goertzel's algorithm + // again on the sc and sp sequences returned here. + accum += cc.valueAdvance() * sc - cp.valueAdvance() * sp; + } +#endif + return accum*2.; +} + +/* + * evaluates the minimum and maximum |H(f)| bound in a band region. + * + * This is usually done with equally spaced increments in the target band in question. + * The passband is often very small, and sampled that way. The stopband is often much + * larger. + * + * We use the fact that the overall polyphase filter has an additional bank at the end + * for interpolation; hence it is overspecified for the H(f) computation. Thus the + * first polyphase is never actually checked, excepting its first term. + * + * In this code we use the firTransfer() evaluator above, which uses Goertzel's + * algorithm to calculate the transfer function at each point. + * + * TODO: An alternative with equal spacing is the FFT/DFT. An alternative with unequal + * spacing is a chirp transform. + * + * @param coef is the designed polyphase filter banks + * + * @param L is the number of phases (for interpolation) + * + * @param halfNumCoef should be half the number of coefficients for a single + * polyphase. + * + * @param fstart is the normalized frequency start. + * + * @param fend is the normalized frequency end. + * + * @param steps is the number of steps to take (sampling) between frequency start and end + * + * @param firMin returns the minimum transfer |H(f)| found + * + * @param firMax returns the maximum transfer |H(f)| found + * + * 0 <= f <= 0.5. + * This is used to test passband and stopband performance. + */ +template <typename T> +static void testFir(const T* coef, int L, int halfNumCoef, + double fstart, double fend, int steps, double &firMin, double &firMax) { + double wstart = fstart*(2.*M_PI); + double wend = fend*(2.*M_PI); + double wstep = (wend - wstart)/steps; + double fmax, fmin; + double trf = firTransfer(coef, L, halfNumCoef, wstart); + if (trf<0) { + trf = -trf; + } + fmin = fmax = trf; + wstart += wstep; + for (int i=1; i<steps; ++i) { + trf = firTransfer(coef, L, halfNumCoef, wstart); + if (trf<0) { + trf = -trf; + } + if (trf>fmax) { + fmax = trf; + } + else if (trf<fmin) { + fmin = trf; + } + wstart += wstep; + } + // renormalize - this is only needed for integer filter types + double norm = 1./((1ULL<<(sizeof(T)*8-1))*L); + + firMin = fmin * norm; + firMax = fmax * norm; +} + +/* + * evaluates the |H(f)| lowpass band characteristics. + * + * This function tests the lowpass characteristics for the overall polyphase filter, + * and is used to verify the design. For this case, fp should be set to the + * passband normalized frequency from 0 to 0.5 for the overall filter (thus it + * is the designed polyphase bank value / L). Likewise for fs. + * + * @param coef is the designed polyphase filter banks + * + * @param L is the number of phases (for interpolation) + * + * @param halfNumCoef should be half the number of coefficients for a single + * polyphase. + * + * @param fp is the passband normalized frequency, 0 < fp < fs < 0.5. + * + * @param fs is the stopband normalized frequency, 0 < fp < fs < 0.5. + * + * @param passSteps is the number of passband sampling steps. + * + * @param stopSteps is the number of stopband sampling steps. + * + * @param passMin is the minimum value in the passband + * + * @param passMax is the maximum value in the passband (useful for scaling). This should + * be less than 1., to avoid sine wave test overflow. + * + * @param passRipple is the passband ripple. Typically this should be less than 0.1 for + * an audio filter. Generally speaker/headphone device characteristics will dominate + * the passband term. + * + * @param stopMax is the maximum value in the stopband. + * + * @param stopRipple is the stopband ripple, also known as stopband attenuation. + * Typically this should be greater than ~80dB for low quality, and greater than + * ~100dB for full 16b quality, otherwise aliasing may become noticeable. + * + */ +template <typename T> +static void testFir(const T* coef, int L, int halfNumCoef, + double fp, double fs, int passSteps, int stopSteps, + double &passMin, double &passMax, double &passRipple, + double &stopMax, double &stopRipple) { + double fmin, fmax; + testFir(coef, L, halfNumCoef, 0., fp, passSteps, fmin, fmax); + double d1 = (fmax - fmin)/2.; + passMin = fmin; + passMax = fmax; + passRipple = -20.*log10(1. - d1); // passband ripple + testFir(coef, L, halfNumCoef, fs, 0.5, stopSteps, fmin, fmax); + // fmin is really not important for the stopband. + stopMax = fmax; + stopRipple = -20.*log10(fmax); // stopband ripple/attenuation +} + +/* + * Calculates the overall polyphase filter based on a windowed sinc function. + * + * The windowed sinc is an odd length symmetric filter of exactly L*halfNumCoef*2+1 + * taps for the entire kernel. This is then decomposed into L+1 polyphase filterbanks. + * The last filterbank is used for interpolation purposes (and is mostly composed + * of the first bank shifted by one sample), and is unnecessary if one does + * not do interpolation. + * + * We use the last filterbank for some transfer function calculation purposes, + * so it needs to be generated anyways. + * + * @param coef is the caller allocated space for coefficients. This should be + * exactly (L+1)*halfNumCoef in size. + * + * @param L is the number of phases (for interpolation) + * + * @param halfNumCoef should be half the number of coefficients for a single + * polyphase. + * + * @param stopBandAtten is the stopband value, should be >50dB. + * + * @param fcr is cutoff frequency/sampling rate (<0.5). At this point, the energy + * should be 6dB less. (fcr is where the amplitude drops by half). Use the + * firKaiserTbw() to calculate the transition bandwidth. fcr is the midpoint + * between the stop band and the pass band (fstop+fpass)/2. + * + * @param atten is the attenuation (generally slightly less than 1). + */ + +template <typename T> +static inline void firKaiserGen(T* coef, int L, int halfNumCoef, + double stopBandAtten, double fcr, double atten) { + // + // Formula 3.2.5, 3.2.7, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48 + // Formula 7.75, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542 + // + // See also: http://melodi.ee.washington.edu/courses/ee518/notes/lec17.pdf + // + // Kaiser window and beta parameter + // + // | 0.1102*(A - 8.7) A > 50 + // beta = | 0.5842*(A - 21)^0.4 + 0.07886*(A - 21) 21 <= A <= 50 + // | 0. A < 21 + // + // with A is the desired stop-band attenuation in dBFS + // + // 30 dB 2.210 + // 40 dB 3.384 + // 50 dB 4.538 + // 60 dB 5.658 + // 70 dB 6.764 + // 80 dB 7.865 + // 90 dB 8.960 + // 100 dB 10.056 + + const int N = L * halfNumCoef; // non-negative half + const double beta = 0.1102 * (stopBandAtten - 8.7); // >= 50dB always + const double xstep = (2. * M_PI) * fcr / L; + const double xfrac = 1. / N; + const double yscale = atten * L / (I0(beta) * M_PI); + + // We use sine generators, which computes sines on regular step intervals. + // This speeds up overall computation about 40% from computing the sine directly. + + SineGenGen sgg(0., xstep, L*xstep); // generates sine generators (one per polyphase) + + for (int i=0 ; i<=L ; ++i) { // generate an extra set of coefs for interpolation + + // computation for a single polyphase of the overall filter. + SineGen sg = sgg.valueAdvance(); // current sine generator for "j" inner loop. + double err = 0; // for noise shaping on int16_t coefficients (over each polyphase) + + for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) { + double y; + if (CC_LIKELY(ix)) { + double x = static_cast<double>(ix); + + // sine generator: sg.valueAdvance() returns sin(ix*xstep); + y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x; + } else { + y = 2. * atten * fcr; // center of filter, sinc(0) = 1. + sg.advance(); + } + + // (caution!) float version does not need rounding + if (is_same<T, int16_t>::value) { // int16_t needs noise shaping + *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1), err)); + } else { + *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1))); + } + } + } +} + +}; // namespace android + +#endif /*ANDROID_AUDIO_RESAMPLER_FIR_GEN_H*/ diff --git a/services/audioflinger/AudioResamplerFirOps.h b/services/audioflinger/AudioResamplerFirOps.h new file mode 100644 index 0000000..bf2163f --- /dev/null +++ b/services/audioflinger/AudioResamplerFirOps.h @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_AUDIO_RESAMPLER_FIR_OPS_H +#define ANDROID_AUDIO_RESAMPLER_FIR_OPS_H + +namespace android { + +#if defined(__arm__) && !defined(__thumb__) +#define USE_INLINE_ASSEMBLY (true) +#else +#define USE_INLINE_ASSEMBLY (false) +#endif + +#if USE_INLINE_ASSEMBLY && defined(__ARM_NEON__) +#define USE_NEON (true) +#include <arm_neon.h> +#else +#define USE_NEON (false) +#endif + +template<typename T, typename U> +struct is_same +{ + static const bool value = false; +}; + +template<typename T> +struct is_same<T, T> // partial specialization +{ + static const bool value = true; +}; + +static inline +int32_t mulRL(int left, int32_t in, uint32_t vRL) +{ +#if USE_INLINE_ASSEMBLY + int32_t out; + if (left) { + asm( "smultb %[out], %[in], %[vRL] \n" + : [out]"=r"(out) + : [in]"%r"(in), [vRL]"r"(vRL) + : ); + } else { + asm( "smultt %[out], %[in], %[vRL] \n" + : [out]"=r"(out) + : [in]"%r"(in), [vRL]"r"(vRL) + : ); + } + return out; +#else + int16_t v = left ? static_cast<int16_t>(vRL) : static_cast<int16_t>(vRL>>16); + return static_cast<int32_t>((static_cast<int64_t>(in) * v) >> 16); +#endif +} + +static inline +int32_t mulAdd(int16_t in, int16_t v, int32_t a) +{ +#if USE_INLINE_ASSEMBLY + int32_t out; + asm( "smlabb %[out], %[v], %[in], %[a] \n" + : [out]"=r"(out) + : [in]"%r"(in), [v]"r"(v), [a]"r"(a) + : ); + return out; +#else + return a + v * in; +#endif +} + +static inline +int32_t mulAdd(int16_t in, int32_t v, int32_t a) +{ +#if USE_INLINE_ASSEMBLY + int32_t out; + asm( "smlawb %[out], %[v], %[in], %[a] \n" + : [out]"=r"(out) + : [in]"%r"(in), [v]"r"(v), [a]"r"(a) + : ); + return out; +#else + return a + static_cast<int32_t>((static_cast<int64_t>(v) * in) >> 16); +#endif +} + +static inline +int32_t mulAdd(int32_t in, int32_t v, int32_t a) +{ +#if USE_INLINE_ASSEMBLY + int32_t out; + asm( "smmla %[out], %[v], %[in], %[a] \n" + : [out]"=r"(out) + : [in]"%r"(in), [v]"r"(v), [a]"r"(a) + : ); + return out; +#else + return a + static_cast<int32_t>((static_cast<int64_t>(v) * in) >> 32); +#endif +} + +static inline +int32_t mulAddRL(int left, uint32_t inRL, int16_t v, int32_t a) +{ +#if USE_INLINE_ASSEMBLY + int32_t out; + if (left) { + asm( "smlabb %[out], %[v], %[inRL], %[a] \n" + : [out]"=r"(out) + : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) + : ); + } else { + asm( "smlabt %[out], %[v], %[inRL], %[a] \n" + : [out]"=r"(out) + : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) + : ); + } + return out; +#else + int16_t s = left ? static_cast<int16_t>(inRL) : static_cast<int16_t>(inRL>>16); + return a + v * s; +#endif +} + +static inline +int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a) +{ +#if USE_INLINE_ASSEMBLY + int32_t out; + if (left) { + asm( "smlawb %[out], %[v], %[inRL], %[a] \n" + : [out]"=r"(out) + : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) + : ); + } else { + asm( "smlawt %[out], %[v], %[inRL], %[a] \n" + : [out]"=r"(out) + : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) + : ); + } + return out; +#else + int16_t s = left ? static_cast<int16_t>(inRL) : static_cast<int16_t>(inRL>>16); + return a + static_cast<int32_t>((static_cast<int64_t>(v) * s) >> 16); +#endif +} + +}; // namespace android + +#endif /*ANDROID_AUDIO_RESAMPLER_FIR_OPS_H*/ diff --git a/services/audioflinger/AudioResamplerFirProcess.h b/services/audioflinger/AudioResamplerFirProcess.h new file mode 100644 index 0000000..38e387c --- /dev/null +++ b/services/audioflinger/AudioResamplerFirProcess.h @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H +#define ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H + +namespace android { + +// depends on AudioResamplerFirOps.h + +template<int CHANNELS, typename TC> +static inline +void mac( + int32_t& l, int32_t& r, + const TC coef, + const int16_t* samples) +{ + if (CHANNELS == 2) { + uint32_t rl = *reinterpret_cast<const uint32_t*>(samples); + l = mulAddRL(1, rl, coef, l); + r = mulAddRL(0, rl, coef, r); + } else { + r = l = mulAdd(samples[0], coef, l); + } +} + +template<int CHANNELS, typename TC> +static inline +void interpolate( + int32_t& l, int32_t& r, + const TC coef_0, const TC coef_1, + const int16_t lerp, const int16_t* samples) +{ + TC sinc; + + if (is_same<TC, int16_t>::value) { + sinc = (lerp * ((coef_1-coef_0)<<1)>>16) + coef_0; + } else { + sinc = mulAdd(lerp, (coef_1-coef_0)<<1, coef_0); + } + if (CHANNELS == 2) { + uint32_t rl = *reinterpret_cast<const uint32_t*>(samples); + l = mulAddRL(1, rl, sinc, l); + r = mulAddRL(0, rl, sinc, r); + } else { + r = l = mulAdd(samples[0], sinc, l); + } +} + +/* + * Calculates a single output sample (two stereo frames). + * + * This function computes both the positive half FIR dot product and + * the negative half FIR dot product, accumulates, and then applies the volume. + * + * This is a locked phase filter (it does not compute the interpolation). + * + * Use fir() to compute the proper coefficient pointers for a polyphase + * filter bank. + */ + +template <int CHANNELS, int STRIDE, typename TC> +static inline +void ProcessL(int32_t* const out, + int count, + const TC* coefsP, + const TC* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + int32_t l = 0; + int32_t r = 0; + do { + mac<CHANNELS>(l, r, *coefsP++, sP); + sP -= CHANNELS; + mac<CHANNELS>(l, r, *coefsN++, sN); + sN += CHANNELS; + } while (--count > 0); + out[0] += 2 * mulRL(0, l, volumeLR[0]); // Note: only use top 16b + out[1] += 2 * mulRL(0, r, volumeLR[1]); // Note: only use top 16b +} + +/* + * Calculates a single output sample (two stereo frames) interpolating phase. + * + * This function computes both the positive half FIR dot product and + * the negative half FIR dot product, accumulates, and then applies the volume. + * + * This is an interpolated phase filter. + * + * Use fir() to compute the proper coefficient pointers for a polyphase + * filter bank. + */ + +template <int CHANNELS, int STRIDE, typename TC> +static inline +void Process(int32_t* const out, + int count, + const TC* coefsP, + const TC* coefsN, + const TC* coefsP1, + const TC* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + (void) coefsP1; // suppress unused parameter warning + (void) coefsN1; + if (sizeof(*coefsP)==4) { + lerpP >>= 16; // ensure lerpP is 16b + } + int32_t l = 0; + int32_t r = 0; + for (size_t i = 0; i < count; ++i) { + interpolate<CHANNELS>(l, r, coefsP[0], coefsP[count], lerpP, sP); + coefsP++; + sP -= CHANNELS; + interpolate<CHANNELS>(l, r, coefsN[count], coefsN[0], lerpP, sN); + coefsN++; + sN += CHANNELS; + } + out[0] += 2 * mulRL(0, l, volumeLR[0]); // Note: only use top 16b + out[1] += 2 * mulRL(0, r, volumeLR[1]); // Note: only use top 16b +} + +/* + * Calculates a single output sample (two stereo frames) from input sample pointer. + * + * This sets up the params for the accelerated Process() and ProcessL() + * functions to do the appropriate dot products. + * + * @param out should point to the output buffer with at least enough space for 2 output frames. + * + * @param phase is the fractional distance between input samples for interpolation: + * phase >= 0 && phase < phaseWrapLimit. It can be thought of as a rational fraction + * of phase/phaseWrapLimit. + * + * @param phaseWrapLimit is #polyphases<<coefShift, where #polyphases is the number of polyphases + * in the polyphase filter. Likewise, #polyphases can be obtained as (phaseWrapLimit>>coefShift). + * + * @param coefShift gives the bit alignment of the polyphase index in the phase parameter. + * + * @param halfNumCoefs is the half the number of coefficients per polyphase filter. Since the + * overall filterbank is odd-length symmetric, only halfNumCoefs need be stored. + * + * @param coefs is the polyphase filter bank, starting at from polyphase index 0, and ranging to + * and including the #polyphases. Each polyphase of the filter has half-length halfNumCoefs + * (due to symmetry). The total size of the filter bank in coefficients is + * (#polyphases+1)*halfNumCoefs. + * + * The filter bank coefs should be aligned to a minimum of 16 bytes (preferrably to cache line). + * + * The coefs should be attenuated (to compensate for passband ripple) + * if storing back into the native format. + * + * @param samples are unaligned input samples. The position is in the "middle" of the + * sample array with respect to the FIR filter: + * the negative half of the filter is dot product from samples+1 to samples+halfNumCoefs; + * the positive half of the filter is dot product from samples to samples-halfNumCoefs+1. + * + * @param volumeLR is a pointer to an array of two 32 bit volume values, one per stereo channel, + * expressed as a S32 integer. A negative value inverts the channel 180 degrees. + * The pointer volumeLR should be aligned to a minimum of 8 bytes. + * A typical value for volume is 0x1000 to align to a unity gain output of 20.12. + * + * In between calls to filterCoefficient, the phase is incremented by phaseIncrement, where + * phaseIncrement is calculated as inputSampling * phaseWrapLimit / outputSampling. + * + * The filter polyphase index is given by indexP = phase >> coefShift. Due to + * odd length symmetric filter, the polyphase index of the negative half depends on + * whether interpolation is used. + * + * The fractional siting between the polyphase indices is given by the bits below coefShift: + * + * lerpP = phase << 32 - coefShift >> 1; // for 32 bit unsigned phase multiply + * lerpP = phase << 32 - coefShift >> 17; // for 16 bit unsigned phase multiply + * + * For integer types, this is expressed as: + * + * lerpP = phase << sizeof(phase)*8 - coefShift + * >> (sizeof(phase)-sizeof(*coefs))*8 + 1; + * + */ + +template<int CHANNELS, bool LOCKED, int STRIDE, typename TC> +static inline +void fir(int32_t* const out, + const uint32_t phase, const uint32_t phaseWrapLimit, + const int coefShift, const int halfNumCoefs, const TC* const coefs, + const int16_t* const samples, const int32_t* const volumeLR) +{ + // NOTE: be very careful when modifying the code here. register + // pressure is very high and a small change might cause the compiler + // to generate far less efficient code. + // Always sanity check the result with objdump or test-resample. + + if (LOCKED) { + // locked polyphase (no interpolation) + // Compute the polyphase filter index on the positive and negative side. + uint32_t indexP = phase >> coefShift; + uint32_t indexN = (phaseWrapLimit - phase) >> coefShift; + const TC* coefsP = coefs + indexP*halfNumCoefs; + const TC* coefsN = coefs + indexN*halfNumCoefs; + const int16_t* sP = samples; + const int16_t* sN = samples + CHANNELS; + + // dot product filter. + ProcessL<CHANNELS, STRIDE>(out, + halfNumCoefs, coefsP, coefsN, sP, sN, volumeLR); + } else { + // interpolated polyphase + // Compute the polyphase filter index on the positive and negative side. + uint32_t indexP = phase >> coefShift; + uint32_t indexN = (phaseWrapLimit - phase - 1) >> coefShift; // one's complement. + const TC* coefsP = coefs + indexP*halfNumCoefs; + const TC* coefsN = coefs + indexN*halfNumCoefs; + const TC* coefsP1 = coefsP + halfNumCoefs; + const TC* coefsN1 = coefsN + halfNumCoefs; + const int16_t* sP = samples; + const int16_t* sN = samples + CHANNELS; + + // Interpolation fraction lerpP derived by shifting all the way up and down + // to clear the appropriate bits and align to the appropriate level + // for the integer multiply. The constants should resolve in compile time. + // + // The interpolated filter coefficient is derived as follows for the pos/neg half: + // + // interpolated[P] = index[P]*lerpP + index[P+1]*(1-lerpP) + // interpolated[N] = index[N+1]*lerpP + index[N]*(1-lerpP) + uint32_t lerpP = phase << (sizeof(phase)*8 - coefShift) + >> ((sizeof(phase)-sizeof(*coefs))*8 + 1); + + // on-the-fly interpolated dot product filter + Process<CHANNELS, STRIDE>(out, + halfNumCoefs, coefsP, coefsN, coefsP1, coefsN1, sP, sN, lerpP, volumeLR); + } +} + +}; // namespace android + +#endif /*ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H*/ diff --git a/services/audioflinger/AudioResamplerFirProcessNeon.h b/services/audioflinger/AudioResamplerFirProcessNeon.h new file mode 100644 index 0000000..f311cef --- /dev/null +++ b/services/audioflinger/AudioResamplerFirProcessNeon.h @@ -0,0 +1,1149 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_NEON_H +#define ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_NEON_H + +namespace android { + +// depends on AudioResamplerFirOps.h, AudioResamplerFirProcess.h + +#if USE_NEON +// +// NEON specializations are enabled for Process() and ProcessL() +// +// TODO: Stride 16 and Stride 8 can be combined with one pass stride 8 (if necessary) +// and looping stride 16 (or vice versa). This has some polyphase coef data alignment +// issues with S16 coefs. Consider this later. + +// Macros to save a mono/stereo accumulator sample in q0 (and q4) as stereo out. +#define ASSEMBLY_ACCUMULATE_MONO \ + "vld1.s32 {d2}, [%[vLR]:64] \n"/* (1) load volumes */\ + "vld1.s32 {d3}, %[out] \n"/* (2) unaligned load the output */\ + "vpadd.s32 d0, d0, d1 \n"/* (1) add all 4 partial sums */\ + "vpadd.s32 d0, d0, d0 \n"/* (1+4d) and replicate L/R */\ + "vqrdmulh.s32 d0, d0, d2 \n"/* (2+3d) apply volume */\ + "vqadd.s32 d3, d3, d0 \n"/* (1+4d) accumulate result (saturating) */\ + "vst1.s32 {d3}, %[out] \n"/* (2+2d) store result */ + +#define ASSEMBLY_ACCUMULATE_STEREO \ + "vld1.s32 {d2}, [%[vLR]:64] \n"/* (1) load volumes*/\ + "vld1.s32 {d3}, %[out] \n"/* (2) unaligned load the output*/\ + "vpadd.s32 d0, d0, d1 \n"/* (1) add all 4 partial sums from q0*/\ + "vpadd.s32 d8, d8, d9 \n"/* (1) add all 4 partial sums from q4*/\ + "vpadd.s32 d0, d0, d8 \n"/* (1+4d) combine into L/R*/\ + "vqrdmulh.s32 d0, d0, d2 \n"/* (2+3d) apply volume*/\ + "vqadd.s32 d3, d3, d0 \n"/* (1+4d) accumulate result (saturating)*/\ + "vst1.s32 {d3}, %[out] \n"/* (2+2d)store result*/ + +template <> +inline void ProcessL<1, 16>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// (0 - combines+) accumulator = 0 + + "1: \n" + + "vld1.16 {q2}, [%[sP]] \n"// (2+0d) load 8 16-bits mono samples + "vld1.16 {q3}, [%[sN]]! \n"// (2) load 8 16-bits mono samples + "vld1.16 {q8}, [%[coefsP0]:128]! \n"// (1) load 8 16-bits coefs + "vld1.16 {q10}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs + + "vrev64.16 q2, q2 \n"// (1) reverse s3, s2, s1, s0, s7, s6, s5, s4 + + // reordering the vmal to do d6, d7 before d4, d5 is slower(?) + "vmlal.s16 q0, d4, d17 \n"// (1+0d) multiply (reversed)samples by coef + "vmlal.s16 q0, d5, d16 \n"// (1) multiply (reversed)samples by coef + "vmlal.s16 q0, d6, d20 \n"// (1) multiply neg samples + "vmlal.s16 q0, d7, d21 \n"// (1) multiply neg samples + + // moving these ARM instructions before neon above seems to be slower + "subs %[count], %[count], #8 \n"// (1) update loop counter + "sub %[sP], %[sP], #16 \n"// (0) move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q10" + ); +} + +template <> +inline void ProcessL<2, 16>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// (1) acc_L = 0 + "veor q4, q4, q4 \n"// (0 combines+) acc_R = 0 + + "1: \n" + + "vld2.16 {q2, q3}, [%[sP]] \n"// (3+0d) load 8 16-bits stereo samples + "vld2.16 {q5, q6}, [%[sN]]! \n"// (3) load 8 16-bits stereo samples + "vld1.16 {q8}, [%[coefsP0]:128]! \n"// (1) load 8 16-bits coefs + "vld1.16 {q10}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs + + "vrev64.16 q2, q2 \n"// (1) reverse 8 frames of the left positive + "vrev64.16 q3, q3 \n"// (0 combines+) reverse right positive + + "vmlal.s16 q0, d4, d17 \n"// (1) multiply (reversed) samples left + "vmlal.s16 q0, d5, d16 \n"// (1) multiply (reversed) samples left + "vmlal.s16 q4, d6, d17 \n"// (1) multiply (reversed) samples right + "vmlal.s16 q4, d7, d16 \n"// (1) multiply (reversed) samples right + "vmlal.s16 q0, d10, d20 \n"// (1) multiply samples left + "vmlal.s16 q0, d11, d21 \n"// (1) multiply samples left + "vmlal.s16 q4, d12, d20 \n"// (1) multiply samples right + "vmlal.s16 q4, d13, d21 \n"// (1) multiply samples right + + // moving these ARM before neon seems to be slower + "subs %[count], %[count], #8 \n"// (1) update loop counter + "sub %[sP], %[sP], #32 \n"// (0) move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q4", "q5", "q6", + "q8", "q10" + ); +} + +template <> +inline void Process<1, 16>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* coefsP1, + const int16_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase S32 Q15 + "veor q0, q0, q0 \n"// (0 - combines+) accumulator = 0 + + "1: \n" + + "vld1.16 {q2}, [%[sP]] \n"// (2+0d) load 8 16-bits mono samples + "vld1.16 {q3}, [%[sN]]! \n"// (2) load 8 16-bits mono samples + "vld1.16 {q8}, [%[coefsP0]:128]! \n"// (1) load 8 16-bits coefs + "vld1.16 {q9}, [%[coefsP1]:128]! \n"// (1) load 8 16-bits coefs for interpolation + "vld1.16 {q10}, [%[coefsN1]:128]! \n"// (1) load 8 16-bits coefs + "vld1.16 {q11}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs for interpolation + + "vsub.s16 q9, q9, q8 \n"// (1) interpolate (step1) 1st set of coefs + "vsub.s16 q11, q11, q10 \n"// (1) interpolate (step1) 2nd set of coets + + "vqrdmulh.s16 q9, q9, d2[0] \n"// (2) interpolate (step2) 1st set of coefs + "vqrdmulh.s16 q11, q11, d2[0] \n"// (2) interpolate (step2) 2nd set of coefs + + "vrev64.16 q2, q2 \n"// (1) reverse s3, s2, s1, s0, s7, s6, s5, s4 + + "vadd.s16 q8, q8, q9 \n"// (1+2d) interpolate (step3) 1st set + "vadd.s16 q10, q10, q11 \n"// (1+1d) interpolate (step3) 2nd set + + // reordering the vmal to do d6, d7 before d4, d5 is slower(?) + "vmlal.s16 q0, d4, d17 \n"// (1+0d) multiply reversed samples by coef + "vmlal.s16 q0, d5, d16 \n"// (1) multiply reversed samples by coef + "vmlal.s16 q0, d6, d20 \n"// (1) multiply neg samples + "vmlal.s16 q0, d7, d21 \n"// (1) multiply neg samples + + // moving these ARM instructions before neon above seems to be slower + "subs %[count], %[count], #8 \n"// (1) update loop counter + "sub %[sP], %[sP], #16 \n"// (0) move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [coefsP1] "+r" (coefsP1), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q9", "q10", "q11" + ); +} + +template <> +inline void Process<2, 16>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* coefsP1, + const int16_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase + "veor q0, q0, q0 \n"// (1) acc_L = 0 + "veor q4, q4, q4 \n"// (0 combines+) acc_R = 0 + + "1: \n" + + "vld2.16 {q2, q3}, [%[sP]] \n"// (3+0d) load 8 16-bits stereo samples + "vld2.16 {q5, q6}, [%[sN]]! \n"// (3) load 8 16-bits stereo samples + "vld1.16 {q8}, [%[coefsP0]:128]! \n"// (1) load 8 16-bits coefs + "vld1.16 {q9}, [%[coefsP1]:128]! \n"// (1) load 8 16-bits coefs for interpolation + "vld1.16 {q10}, [%[coefsN1]:128]! \n"// (1) load 8 16-bits coefs + "vld1.16 {q11}, [%[coefsN0]:128]! \n"// (1) load 8 16-bits coefs for interpolation + + "vsub.s16 q9, q9, q8 \n"// (1) interpolate (step1) 1st set of coefs + "vsub.s16 q11, q11, q10 \n"// (1) interpolate (step1) 2nd set of coets + + "vqrdmulh.s16 q9, q9, d2[0] \n"// (2) interpolate (step2) 1st set of coefs + "vqrdmulh.s16 q11, q11, d2[0] \n"// (2) interpolate (step2) 2nd set of coefs + + "vrev64.16 q2, q2 \n"// (1) reverse 8 frames of the left positive + "vrev64.16 q3, q3 \n"// (1) reverse 8 frames of the right positive + + "vadd.s16 q8, q8, q9 \n"// (1+1d) interpolate (step3) 1st set + "vadd.s16 q10, q10, q11 \n"// (1+1d) interpolate (step3) 2nd set + + "vmlal.s16 q0, d4, d17 \n"// (1) multiply reversed samples left + "vmlal.s16 q0, d5, d16 \n"// (1) multiply reversed samples left + "vmlal.s16 q4, d6, d17 \n"// (1) multiply reversed samples right + "vmlal.s16 q4, d7, d16 \n"// (1) multiply reversed samples right + "vmlal.s16 q0, d10, d20 \n"// (1) multiply samples left + "vmlal.s16 q0, d11, d21 \n"// (1) multiply samples left + "vmlal.s16 q4, d12, d20 \n"// (1) multiply samples right + "vmlal.s16 q4, d13, d21 \n"// (1) multiply samples right + + // moving these ARM before neon seems to be slower + "subs %[count], %[count], #8 \n"// (1) update loop counter + "sub %[sP], %[sP], #32 \n"// (0) move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [coefsP1] "+r" (coefsP1), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q4", "q5", "q6", + "q8", "q9", "q10", "q11" + ); +} + +template <> +inline void ProcessL<1, 16>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// result, initialize to 0 + + "1: \n" + + "vld1.16 {q2}, [%[sP]] \n"// load 8 16-bits mono samples + "vld1.16 {q3}, [%[sN]]! \n"// load 8 16-bits mono samples + "vld1.32 {q8, q9}, [%[coefsP0]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q10, q11}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs + + "vrev64.16 q2, q2 \n"// reverse 8 frames of the positive side + + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d7, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q13, q13, q14 \n"// accumulate result + "vadd.s32 q0, q0, q15 \n"// accumulate result + "vadd.s32 q0, q0, q13 \n"// accumulate result + + "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples + "subs %[count], %[count], #8 \n"// update loop counter + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15" + ); +} + +template <> +inline void ProcessL<2, 16>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// result, initialize to 0 + "veor q4, q4, q4 \n"// result, initialize to 0 + + "1: \n" + + "vld2.16 {q2, q3}, [%[sP]] \n"// load 4 16-bits stereo samples + "vld2.16 {q5, q6}, [%[sN]]! \n"// load 4 16-bits stereo samples + "vld1.32 {q8, q9}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q10, q11}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs + + "vrev64.16 q2, q2 \n"// reverse 8 frames of the positive side + "vrev64.16 q3, q3 \n"// reverse 8 frames of the positive side + + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d10, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d11, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q13, q13, q14 \n"// accumulate result + "vadd.s32 q0, q0, q15 \n"// (+1) accumulate result + "vadd.s32 q0, q0, q13 \n"// (+1) accumulate result + + "vshll.s16 q12, d6, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d7, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d12, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d13, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef + + "vadd.s32 q4, q4, q12 \n"// accumulate result + "vadd.s32 q13, q13, q14 \n"// accumulate result + "vadd.s32 q4, q4, q15 \n"// (+1) accumulate result + "vadd.s32 q4, q4, q13 \n"// (+1) accumulate result + + "subs %[count], %[count], #8 \n"// update loop counter + "sub %[sP], %[sP], #32 \n"// move pointer to next set of samples + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q4", "q5", "q6", + "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15" + ); +} + +template <> +inline void Process<1, 16>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int32_t* coefsP1, + const int32_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase + "veor q0, q0, q0 \n"// result, initialize to 0 + + "1: \n" + + "vld1.16 {q2}, [%[sP]] \n"// load 8 16-bits mono samples + "vld1.16 {q3}, [%[sN]]! \n"// load 8 16-bits mono samples + "vld1.32 {q8, q9}, [%[coefsP0]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q12, q13}, [%[coefsP1]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q10, q11}, [%[coefsN1]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q14, q15}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs + + "vsub.s32 q12, q12, q8 \n"// interpolate (step1) + "vsub.s32 q13, q13, q9 \n"// interpolate (step1) + "vsub.s32 q14, q14, q10 \n"// interpolate (step1) + "vsub.s32 q15, q15, q11 \n"// interpolate (step1) + + "vqrdmulh.s32 q12, q12, d2[0] \n"// interpolate (step2) + "vqrdmulh.s32 q13, q13, d2[0] \n"// interpolate (step2) + "vqrdmulh.s32 q14, q14, d2[0] \n"// interpolate (step2) + "vqrdmulh.s32 q15, q15, d2[0] \n"// interpolate (step2) + + "vadd.s32 q8, q8, q12 \n"// interpolate (step3) + "vadd.s32 q9, q9, q13 \n"// interpolate (step3) + "vadd.s32 q10, q10, q14 \n"// interpolate (step3) + "vadd.s32 q11, q11, q15 \n"// interpolate (step3) + + "vrev64.16 q2, q2 \n"// reverse 8 frames of the positive side + + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d7, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q13, q13, q14 \n"// accumulate result + "vadd.s32 q0, q0, q15 \n"// accumulate result + "vadd.s32 q0, q0, q13 \n"// accumulate result + + "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples + "subs %[count], %[count], #8 \n"// update loop counter + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [coefsP1] "+r" (coefsP1), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15" + ); +} + +template <> +inline void Process<2, 16>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int32_t* coefsP1, + const int32_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 16; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase + "veor q0, q0, q0 \n"// result, initialize to 0 + "veor q4, q4, q4 \n"// result, initialize to 0 + + "1: \n" + + "vld2.16 {q2, q3}, [%[sP]] \n"// load 4 16-bits stereo samples + "vld2.16 {q5, q6}, [%[sN]]! \n"// load 4 16-bits stereo samples + "vld1.32 {q8, q9}, [%[coefsP0]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q12, q13}, [%[coefsP1]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q10, q11}, [%[coefsN1]:128]! \n"// load 8 32-bits coefs + "vld1.32 {q14, q15}, [%[coefsN0]:128]! \n"// load 8 32-bits coefs + + "vsub.s32 q12, q12, q8 \n"// interpolate (step1) + "vsub.s32 q13, q13, q9 \n"// interpolate (step1) + "vsub.s32 q14, q14, q10 \n"// interpolate (step1) + "vsub.s32 q15, q15, q11 \n"// interpolate (step1) + + "vqrdmulh.s32 q12, q12, d2[0] \n"// interpolate (step2) + "vqrdmulh.s32 q13, q13, d2[0] \n"// interpolate (step2) + "vqrdmulh.s32 q14, q14, d2[0] \n"// interpolate (step2) + "vqrdmulh.s32 q15, q15, d2[0] \n"// interpolate (step2) + + "vadd.s32 q8, q8, q12 \n"// interpolate (step3) + "vadd.s32 q9, q9, q13 \n"// interpolate (step3) + "vadd.s32 q10, q10, q14 \n"// interpolate (step3) + "vadd.s32 q11, q11, q15 \n"// interpolate (step3) + + "vrev64.16 q2, q2 \n"// reverse 8 frames of the positive side + "vrev64.16 q3, q3 \n"// reverse 8 frames of the positive side + + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d10, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d11, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q13, q13, q14 \n"// accumulate result + "vadd.s32 q0, q0, q15 \n"// (+1) accumulate result + "vadd.s32 q0, q0, q13 \n"// (+1) accumulate result + + "vshll.s16 q12, d6, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d7, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d12, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d13, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q9 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q11 \n"// multiply samples by interpolated coef + + "vadd.s32 q4, q4, q12 \n"// accumulate result + "vadd.s32 q13, q13, q14 \n"// accumulate result + "vadd.s32 q4, q4, q15 \n"// (+1) accumulate result + "vadd.s32 q4, q4, q13 \n"// (+1) accumulate result + + "subs %[count], %[count], #8 \n"// update loop counter + "sub %[sP], %[sP], #32 \n"// move pointer to next set of samples + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [coefsP1] "+r" (coefsP1), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q4", "q5", "q6", + "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15" + ); +} + +template <> +inline void ProcessL<1, 8>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// (0 - combines+) accumulator = 0 + + "1: \n" + + "vld1.16 {d4}, [%[sP]] \n"// (2+0d) load 4 16-bits mono samples + "vld1.16 {d6}, [%[sN]]! \n"// (2) load 4 16-bits mono samples + "vld1.16 {d16}, [%[coefsP0]:64]! \n"// (1) load 4 16-bits coefs + "vld1.16 {d20}, [%[coefsN0]:64]! \n"// (1) load 4 16-bits coefs + + "vrev64.16 d4, d4 \n"// (1) reversed s3, s2, s1, s0, s7, s6, s5, s4 + + // reordering the vmal to do d6, d7 before d4, d5 is slower(?) + "vmlal.s16 q0, d4, d16 \n"// (1) multiply (reversed)samples by coef + "vmlal.s16 q0, d6, d20 \n"// (1) multiply neg samples + + // moving these ARM instructions before neon above seems to be slower + "subs %[count], %[count], #4 \n"// (1) update loop counter + "sub %[sP], %[sP], #8 \n"// (0) move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q10" + ); +} + +template <> +inline void ProcessL<2, 8>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// (1) acc_L = 0 + "veor q4, q4, q4 \n"// (0 combines+) acc_R = 0 + + "1: \n" + + "vld2.16 {d4, d5}, [%[sP]] \n"// (2+0d) load 8 16-bits stereo samples + "vld2.16 {d6, d7}, [%[sN]]! \n"// (2) load 8 16-bits stereo samples + "vld1.16 {d16}, [%[coefsP0]:64]! \n"// (1) load 8 16-bits coefs + "vld1.16 {d20}, [%[coefsN0]:64]! \n"// (1) load 8 16-bits coefs + + "vrev64.16 q2, q2 \n"// (1) reverse 8 frames of the left positive + + "vmlal.s16 q0, d4, d16 \n"// (1) multiply (reversed) samples left + "vmlal.s16 q4, d5, d16 \n"// (1) multiply (reversed) samples right + "vmlal.s16 q0, d6, d20 \n"// (1) multiply samples left + "vmlal.s16 q4, d7, d20 \n"// (1) multiply samples right + + // moving these ARM before neon seems to be slower + "subs %[count], %[count], #4 \n"// (1) update loop counter + "sub %[sP], %[sP], #16 \n"// (0) move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q4", "q5", "q6", + "q8", "q10" + ); +} + +template <> +inline void Process<1, 8>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* coefsP1, + const int16_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase S32 Q15 + "veor q0, q0, q0 \n"// (0 - combines+) accumulator = 0 + + "1: \n" + + "vld1.16 {d4}, [%[sP]] \n"// (2+0d) load 4 16-bits mono samples + "vld1.16 {d6}, [%[sN]]! \n"// (2) load 4 16-bits mono samples + "vld1.16 {d16}, [%[coefsP0]:64]! \n"// (1) load 4 16-bits coefs + "vld1.16 {d17}, [%[coefsP1]:64]! \n"// (1) load 4 16-bits coefs for interpolation + "vld1.16 {d20}, [%[coefsN1]:64]! \n"// (1) load 4 16-bits coefs + "vld1.16 {d21}, [%[coefsN0]:64]! \n"// (1) load 4 16-bits coefs for interpolation + + "vsub.s16 d17, d17, d16 \n"// (1) interpolate (step1) 1st set of coefs + "vsub.s16 d21, d21, d20 \n"// (1) interpolate (step1) 2nd set of coets + + "vqrdmulh.s16 d17, d17, d2[0] \n"// (2) interpolate (step2) 1st set of coefs + "vqrdmulh.s16 d21, d21, d2[0] \n"// (2) interpolate (step2) 2nd set of coefs + + "vrev64.16 d4, d4 \n"// (1) reverse s3, s2, s1, s0, s7, s6, s5, s4 + + "vadd.s16 d16, d16, d17 \n"// (1+2d) interpolate (step3) 1st set + "vadd.s16 d20, d20, d21 \n"// (1+1d) interpolate (step3) 2nd set + + // reordering the vmal to do d6, d7 before d4, d5 is slower(?) + "vmlal.s16 q0, d4, d16 \n"// (1+0d) multiply (reversed)by coef + "vmlal.s16 q0, d6, d20 \n"// (1) multiply neg samples + + // moving these ARM instructions before neon above seems to be slower + "subs %[count], %[count], #4 \n"// (1) update loop counter + "sub %[sP], %[sP], #8 \n"// move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [coefsP1] "+r" (coefsP1), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q9", "q10", "q11" + ); +} + +template <> +inline void Process<2, 8>(int32_t* const out, + int count, + const int16_t* coefsP, + const int16_t* coefsN, + const int16_t* coefsP1, + const int16_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase + "veor q0, q0, q0 \n"// (1) acc_L = 0 + "veor q4, q4, q4 \n"// (0 combines+) acc_R = 0 + + "1: \n" + + "vld2.16 {d4, d5}, [%[sP]] \n"// (3+0d) load 8 16-bits stereo samples + "vld2.16 {d6, d7}, [%[sN]]! \n"// (3) load 8 16-bits stereo samples + "vld1.16 {d16}, [%[coefsP0]:64]! \n"// (1) load 8 16-bits coefs + "vld1.16 {d17}, [%[coefsP1]:64]! \n"// (1) load 8 16-bits coefs for interpolation + "vld1.16 {d20}, [%[coefsN1]:64]! \n"// (1) load 8 16-bits coefs + "vld1.16 {d21}, [%[coefsN0]:64]! \n"// (1) load 8 16-bits coefs for interpolation + + "vsub.s16 d17, d17, d16 \n"// (1) interpolate (step1) 1st set of coefs + "vsub.s16 d21, d21, d20 \n"// (1) interpolate (step1) 2nd set of coets + + "vqrdmulh.s16 d17, d17, d2[0] \n"// (2) interpolate (step2) 1st set of coefs + "vqrdmulh.s16 d21, d21, d2[0] \n"// (2) interpolate (step2) 2nd set of coefs + + "vrev64.16 q2, q2 \n"// (1) reverse 8 frames of the left positive + + "vadd.s16 d16, d16, d17 \n"// (1+1d) interpolate (step3) 1st set + "vadd.s16 d20, d20, d21 \n"// (1+1d) interpolate (step3) 2nd set + + "vmlal.s16 q0, d4, d16 \n"// (1) multiply (reversed) samples left + "vmlal.s16 q4, d5, d16 \n"// (1) multiply (reversed) samples right + "vmlal.s16 q0, d6, d20 \n"// (1) multiply samples left + "vmlal.s16 q4, d7, d20 \n"// (1) multiply samples right + + // moving these ARM before neon seems to be slower + "subs %[count], %[count], #4 \n"// (1) update loop counter + "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples + + // sP used after branch (warning) + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [coefsP1] "+r" (coefsP1), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q4", "q5", "q6", + "q8", "q9", "q10", "q11" + ); +} + +template <> +inline void ProcessL<1, 8>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// result, initialize to 0 + + "1: \n" + + "vld1.16 {d4}, [%[sP]] \n"// load 4 16-bits mono samples + "vld1.16 {d6}, [%[sN]]! \n"// load 4 16-bits mono samples + "vld1.32 {q8}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q10}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs + + "vrev64.16 d4, d4 \n"// reverse 2 frames of the positive side + + "vshll.s16 q12, d4, #15 \n"// (stall) extend samples to 31 bits + "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q0, q0, q14 \n"// (stall) accumulate result + + "subs %[count], %[count], #4 \n"// update loop counter + "sub %[sP], %[sP], #8 \n"// move pointer to next set of samples + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q9", "q10", "q11", + "q12", "q14" + ); +} + +template <> +inline void ProcessL<2, 8>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int16_t* sP, + const int16_t* sN, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "veor q0, q0, q0 \n"// result, initialize to 0 + "veor q4, q4, q4 \n"// result, initialize to 0 + + "1: \n" + + "vld2.16 {d4, d5}, [%[sP]] \n"// load 4 16-bits stereo samples + "vld2.16 {d6, d7}, [%[sN]]! \n"// load 4 16-bits stereo samples + "vld1.32 {q8}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q10}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs + + "vrev64.16 q2, q2 \n"// reverse 2 frames of the positive side + + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits + + "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d7, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q12, q12, q8 \n"// multiply samples by coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by coef + "vqrdmulh.s32 q15, q15, q10 \n"// multiply samples by coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q4, q4, q13 \n"// accumulate result + "vadd.s32 q0, q0, q14 \n"// accumulate result + "vadd.s32 q4, q4, q15 \n"// accumulate result + + "subs %[count], %[count], #4 \n"// update loop counter + "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsN0] "+r" (coefsN), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", "q4", + "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15" + ); +} + +template <> +inline void Process<1, 8>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int32_t* coefsP1, + const int32_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 1; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase + "veor q0, q0, q0 \n"// result, initialize to 0 + + "1: \n" + + "vld1.16 {d4}, [%[sP]] \n"// load 4 16-bits mono samples + "vld1.16 {d6}, [%[sN]]! \n"// load 4 16-bits mono samples + "vld1.32 {q8}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q9}, [%[coefsP1]:128]! \n"// load 4 32-bits coefs for interpolation + "vld1.32 {q10}, [%[coefsN1]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q11}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs for interpolation + + "vrev64.16 d4, d4 \n"// reverse 2 frames of the positive side + + "vsub.s32 q9, q9, q8 \n"// interpolate (step1) 1st set of coefs + "vsub.s32 q11, q11, q10 \n"// interpolate (step1) 2nd set of coets + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q9, q9, d2[0] \n"// interpolate (step2) 1st set of coefs + "vqrdmulh.s32 q11, q11, d2[0] \n"// interpolate (step2) 2nd set of coefs + "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits + + "vadd.s32 q8, q8, q9 \n"// interpolate (step3) 1st set + "vadd.s32 q10, q10, q11 \n"// interpolate (step4) 2nd set + + "vqrdmulh.s32 q12, q12, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q0, q0, q14 \n"// accumulate result + + "subs %[count], %[count], #4 \n"// update loop counter + "sub %[sP], %[sP], #8 \n"// move pointer to next set of samples + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_MONO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsP1] "+r" (coefsP1), + [coefsN0] "+r" (coefsN), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", + "q8", "q9", "q10", "q11", + "q12", "q14" + ); +} + +template <> +inline +void Process<2, 8>(int32_t* const out, + int count, + const int32_t* coefsP, + const int32_t* coefsN, + const int32_t* coefsP1, + const int32_t* coefsN1, + const int16_t* sP, + const int16_t* sN, + uint32_t lerpP, + const int32_t* const volumeLR) +{ + const int CHANNELS = 2; // template specialization does not preserve params + const int STRIDE = 8; + sP -= CHANNELS*((STRIDE>>1)-1); + asm ( + "vmov.32 d2[0], %[lerpP] \n"// load the positive phase + "veor q0, q0, q0 \n"// result, initialize to 0 + "veor q4, q4, q4 \n"// result, initialize to 0 + + "1: \n" + "vld2.16 {d4, d5}, [%[sP]] \n"// load 4 16-bits stereo samples + "vld2.16 {d6, d7}, [%[sN]]! \n"// load 4 16-bits stereo samples + "vld1.32 {q8}, [%[coefsP0]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q9}, [%[coefsP1]:128]! \n"// load 4 32-bits coefs for interpolation + "vld1.32 {q10}, [%[coefsN1]:128]! \n"// load 4 32-bits coefs + "vld1.32 {q11}, [%[coefsN0]:128]! \n"// load 4 32-bits coefs for interpolation + + "vrev64.16 q2, q2 \n"// (reversed) 2 frames of the positive side + + "vsub.s32 q9, q9, q8 \n"// interpolate (step1) 1st set of coefs + "vsub.s32 q11, q11, q10 \n"// interpolate (step1) 2nd set of coets + "vshll.s16 q12, d4, #15 \n"// extend samples to 31 bits + "vshll.s16 q13, d5, #15 \n"// extend samples to 31 bits + + "vqrdmulh.s32 q9, q9, d2[0] \n"// interpolate (step2) 1st set of coefs + "vqrdmulh.s32 q11, q11, d2[1] \n"// interpolate (step3) 2nd set of coefs + "vshll.s16 q14, d6, #15 \n"// extend samples to 31 bits + "vshll.s16 q15, d7, #15 \n"// extend samples to 31 bits + + "vadd.s32 q8, q8, q9 \n"// interpolate (step3) 1st set + "vadd.s32 q10, q10, q11 \n"// interpolate (step4) 2nd set + + "vqrdmulh.s32 q12, q12, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q13, q13, q8 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q14, q14, q10 \n"// multiply samples by interpolated coef + "vqrdmulh.s32 q15, q15, q10 \n"// multiply samples by interpolated coef + + "vadd.s32 q0, q0, q12 \n"// accumulate result + "vadd.s32 q4, q4, q13 \n"// accumulate result + "vadd.s32 q0, q0, q14 \n"// accumulate result + "vadd.s32 q4, q4, q15 \n"// accumulate result + + "subs %[count], %[count], #4 \n"// update loop counter + "sub %[sP], %[sP], #16 \n"// move pointer to next set of samples + + "bne 1b \n"// loop + + ASSEMBLY_ACCUMULATE_STEREO + + : [out] "=Uv" (out[0]), + [count] "+r" (count), + [coefsP0] "+r" (coefsP), + [coefsP1] "+r" (coefsP1), + [coefsN0] "+r" (coefsN), + [coefsN1] "+r" (coefsN1), + [sP] "+r" (sP), + [sN] "+r" (sN) + : [lerpP] "r" (lerpP), + [vLR] "r" (volumeLR) + : "cc", "memory", + "q0", "q1", "q2", "q3", "q4", + "q8", "q9", "q10", "q11", + "q12", "q13", "q14", "q15" + ); +} + +#endif //USE_NEON + +}; // namespace android + +#endif /*ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_NEON_H*/ diff --git a/services/audioflinger/Effects.cpp b/services/audioflinger/Effects.cpp index 010e233..29b56db 100644 --- a/services/audioflinger/Effects.cpp +++ b/services/audioflinger/Effects.cpp @@ -116,8 +116,9 @@ status_t AudioFlinger::EffectModule::addHandle(EffectHandle *handle) continue; } // first non destroyed handle is considered in control - if (controlHandle == NULL) + if (controlHandle == NULL) { controlHandle = h; + } if (h->priority() <= priority) { break; } @@ -804,7 +805,112 @@ bool AudioFlinger::EffectModule::isOffloaded() const return mOffloaded; } -void AudioFlinger::EffectModule::dump(int fd, const Vector<String16>& args) +String8 effectFlagsToString(uint32_t flags) { + String8 s; + + s.append("conn. mode: "); + switch (flags & EFFECT_FLAG_TYPE_MASK) { + case EFFECT_FLAG_TYPE_INSERT: s.append("insert"); break; + case EFFECT_FLAG_TYPE_AUXILIARY: s.append("auxiliary"); break; + case EFFECT_FLAG_TYPE_REPLACE: s.append("replace"); break; + case EFFECT_FLAG_TYPE_PRE_PROC: s.append("preproc"); break; + case EFFECT_FLAG_TYPE_POST_PROC: s.append("postproc"); break; + default: s.append("unknown/reserved"); break; + } + s.append(", "); + + s.append("insert pref: "); + switch (flags & EFFECT_FLAG_INSERT_MASK) { + case EFFECT_FLAG_INSERT_ANY: s.append("any"); break; + case EFFECT_FLAG_INSERT_FIRST: s.append("first"); break; + case EFFECT_FLAG_INSERT_LAST: s.append("last"); break; + case EFFECT_FLAG_INSERT_EXCLUSIVE: s.append("exclusive"); break; + default: s.append("unknown/reserved"); break; + } + s.append(", "); + + s.append("volume mgmt: "); + switch (flags & EFFECT_FLAG_VOLUME_MASK) { + case EFFECT_FLAG_VOLUME_NONE: s.append("none"); break; + case EFFECT_FLAG_VOLUME_CTRL: s.append("implements control"); break; + case EFFECT_FLAG_VOLUME_IND: s.append("requires indication"); break; + default: s.append("unknown/reserved"); break; + } + s.append(", "); + + uint32_t devind = flags & EFFECT_FLAG_DEVICE_MASK; + if (devind) { + s.append("device indication: "); + switch (devind) { + case EFFECT_FLAG_DEVICE_IND: s.append("requires updates"); break; + default: s.append("unknown/reserved"); break; + } + s.append(", "); + } + + s.append("input mode: "); + switch (flags & EFFECT_FLAG_INPUT_MASK) { + case EFFECT_FLAG_INPUT_DIRECT: s.append("direct"); break; + case EFFECT_FLAG_INPUT_PROVIDER: s.append("provider"); break; + case EFFECT_FLAG_INPUT_BOTH: s.append("direct+provider"); break; + default: s.append("not set"); break; + } + s.append(", "); + + s.append("output mode: "); + switch (flags & EFFECT_FLAG_OUTPUT_MASK) { + case EFFECT_FLAG_OUTPUT_DIRECT: s.append("direct"); break; + case EFFECT_FLAG_OUTPUT_PROVIDER: s.append("provider"); break; + case EFFECT_FLAG_OUTPUT_BOTH: s.append("direct+provider"); break; + default: s.append("not set"); break; + } + s.append(", "); + + uint32_t accel = flags & EFFECT_FLAG_HW_ACC_MASK; + if (accel) { + s.append("hardware acceleration: "); + switch (accel) { + case EFFECT_FLAG_HW_ACC_SIMPLE: s.append("non-tunneled"); break; + case EFFECT_FLAG_HW_ACC_TUNNEL: s.append("tunneled"); break; + default: s.append("unknown/reserved"); break; + } + s.append(", "); + } + + uint32_t modeind = flags & EFFECT_FLAG_AUDIO_MODE_MASK; + if (modeind) { + s.append("mode indication: "); + switch (modeind) { + case EFFECT_FLAG_AUDIO_MODE_IND: s.append("required"); break; + default: s.append("unknown/reserved"); break; + } + s.append(", "); + } + + uint32_t srcind = flags & EFFECT_FLAG_AUDIO_SOURCE_MASK; + if (srcind) { + s.append("source indication: "); + switch (srcind) { + case EFFECT_FLAG_AUDIO_SOURCE_IND: s.append("required"); break; + default: s.append("unknown/reserved"); break; + } + s.append(", "); + } + + if (flags & EFFECT_FLAG_OFFLOAD_MASK) { + s.append("offloadable, "); + } + + int len = s.length(); + if (s.length() > 2) { + char *str = s.lockBuffer(len); + s.unlockBuffer(len - 2); + } + return s; +} + + +void AudioFlinger::EffectModule::dump(int fd, const Vector<String16>& args __unused) { const size_t SIZE = 256; char buffer[SIZE]; @@ -838,9 +944,10 @@ void AudioFlinger::EffectModule::dump(int fd, const Vector<String16>& args) mDescriptor.type.node[2], mDescriptor.type.node[3],mDescriptor.type.node[4],mDescriptor.type.node[5]); result.append(buffer); - snprintf(buffer, SIZE, "\t\t- apiVersion: %08X\n\t\t- flags: %08X\n", + snprintf(buffer, SIZE, "\t\t- apiVersion: %08X\n\t\t- flags: %08X (%s)\n", mDescriptor.apiVersion, - mDescriptor.flags); + mDescriptor.flags, + effectFlagsToString(mDescriptor.flags).string()); result.append(buffer); snprintf(buffer, SIZE, "\t\t- name: %s\n", mDescriptor.name); @@ -851,37 +958,37 @@ void AudioFlinger::EffectModule::dump(int fd, const Vector<String16>& args) result.append("\t\t- Input configuration:\n"); result.append("\t\t\tFrames Smp rate Channels Format Buffer\n"); - snprintf(buffer, SIZE, "\t\t\t%05zu %05d %08x %6d %p\n", + snprintf(buffer, SIZE, "\t\t\t%05zu %05d %08x %6d (%s) %p\n", mConfig.inputCfg.buffer.frameCount, mConfig.inputCfg.samplingRate, mConfig.inputCfg.channels, mConfig.inputCfg.format, + formatToString((audio_format_t)mConfig.inputCfg.format), mConfig.inputCfg.buffer.raw); result.append(buffer); result.append("\t\t- Output configuration:\n"); result.append("\t\t\tBuffer Frames Smp rate Channels Format\n"); - snprintf(buffer, SIZE, "\t\t\t%p %05zu %05d %08x %d\n", + snprintf(buffer, SIZE, "\t\t\t%p %05zu %05d %08x %d (%s)\n", mConfig.outputCfg.buffer.raw, mConfig.outputCfg.buffer.frameCount, mConfig.outputCfg.samplingRate, mConfig.outputCfg.channels, - mConfig.outputCfg.format); + mConfig.outputCfg.format, + formatToString((audio_format_t)mConfig.outputCfg.format)); result.append(buffer); snprintf(buffer, SIZE, "\t\t%zu Clients:\n", mHandles.size()); result.append(buffer); - result.append("\t\t\tPid Priority Ctrl Locked client server\n"); + result.append("\t\t\t Pid Priority Ctrl Locked client server\n"); for (size_t i = 0; i < mHandles.size(); ++i) { EffectHandle *handle = mHandles[i]; if (handle != NULL && !handle->destroyed_l()) { - handle->dump(buffer, SIZE); + handle->dumpToBuffer(buffer, SIZE); result.append(buffer); } } - result.append("\n"); - write(fd, result.string(), result.length()); if (locked) { @@ -911,18 +1018,15 @@ AudioFlinger::EffectHandle::EffectHandle(const sp<EffectModule>& effect, } int bufOffset = ((sizeof(effect_param_cblk_t) - 1) / sizeof(int) + 1) * sizeof(int); mCblkMemory = client->heap()->allocate(EFFECT_PARAM_BUFFER_SIZE + bufOffset); - if (mCblkMemory != 0) { - mCblk = static_cast<effect_param_cblk_t *>(mCblkMemory->pointer()); - - if (mCblk != NULL) { - new(mCblk) effect_param_cblk_t(); - mBuffer = (uint8_t *)mCblk + bufOffset; - } - } else { + if (mCblkMemory == 0 || + (mCblk = static_cast<effect_param_cblk_t *>(mCblkMemory->pointer())) == NULL) { ALOGE("not enough memory for Effect size=%u", EFFECT_PARAM_BUFFER_SIZE + sizeof(effect_param_cblk_t)); + mCblkMemory.clear(); return; } + new(mCblk) effect_param_cblk_t(); + mBuffer = (uint8_t *)mCblk + bufOffset; } AudioFlinger::EffectHandle::~EffectHandle() @@ -939,6 +1043,11 @@ AudioFlinger::EffectHandle::~EffectHandle() disconnect(false); } +status_t AudioFlinger::EffectHandle::initCheck() +{ + return mClient == 0 || mCblkMemory != 0 ? OK : NO_MEMORY; +} + status_t AudioFlinger::EffectHandle::enable() { ALOGV("enable %p", this); @@ -1179,15 +1288,15 @@ status_t AudioFlinger::EffectHandle::onTransact( } -void AudioFlinger::EffectHandle::dump(char* buffer, size_t size) +void AudioFlinger::EffectHandle::dumpToBuffer(char* buffer, size_t size) { bool locked = mCblk != NULL && AudioFlinger::dumpTryLock(mCblk->lock); - snprintf(buffer, size, "\t\t\t%05d %05d %01u %01u %05u %05u\n", + snprintf(buffer, size, "\t\t\t%5d %5d %3s %3s %5u %5u\n", (mClient == 0) ? getpid_cached : mClient->pid(), mPriority, - mHasControl, - !locked, + mHasControl ? "yes" : "no", + locked ? "yes" : "no", mCblk ? mCblk->clientIndex : 0, mCblk ? mCblk->serverIndex : 0 ); @@ -1568,33 +1677,35 @@ void AudioFlinger::EffectChain::dump(int fd, const Vector<String16>& args) char buffer[SIZE]; String8 result; - snprintf(buffer, SIZE, "Effects for session %d:\n", mSessionId); + size_t numEffects = mEffects.size(); + snprintf(buffer, SIZE, " %d effects for session %d\n", numEffects, mSessionId); result.append(buffer); - bool locked = AudioFlinger::dumpTryLock(mLock); - // failed to lock - AudioFlinger is probably deadlocked - if (!locked) { - result.append("\tCould not lock mutex:\n"); - } + if (numEffects) { + bool locked = AudioFlinger::dumpTryLock(mLock); + // failed to lock - AudioFlinger is probably deadlocked + if (!locked) { + result.append("\tCould not lock mutex:\n"); + } - result.append("\tNum fx In buffer Out buffer Active tracks:\n"); - snprintf(buffer, SIZE, "\t%02zu %p %p %d\n", - mEffects.size(), - mInBuffer, - mOutBuffer, - mActiveTrackCnt); - result.append(buffer); - write(fd, result.string(), result.size()); + result.append("\tIn buffer Out buffer Active tracks:\n"); + snprintf(buffer, SIZE, "\t%p %p %d\n", + mInBuffer, + mOutBuffer, + mActiveTrackCnt); + result.append(buffer); + write(fd, result.string(), result.size()); - for (size_t i = 0; i < mEffects.size(); ++i) { - sp<EffectModule> effect = mEffects[i]; - if (effect != 0) { - effect->dump(fd, args); + for (size_t i = 0; i < numEffects; ++i) { + sp<EffectModule> effect = mEffects[i]; + if (effect != 0) { + effect->dump(fd, args); + } } - } - if (locked) { - mLock.unlock(); + if (locked) { + mLock.unlock(); + } } } diff --git a/services/audioflinger/Effects.h b/services/audioflinger/Effects.h index b717857..ccc4825 100644 --- a/services/audioflinger/Effects.h +++ b/services/audioflinger/Effects.h @@ -169,6 +169,7 @@ public: const sp<IEffectClient>& effectClient, int32_t priority); virtual ~EffectHandle(); + virtual status_t initCheck(); // IEffect virtual status_t enable(); @@ -208,7 +209,7 @@ public: // destroyed_l() must be called with the associated EffectModule mLock held bool destroyed_l() const { return mDestroyed; } - void dump(char* buffer, size_t size); + void dumpToBuffer(char* buffer, size_t size); protected: friend class AudioFlinger; // for mEffect, mHasControl, mEnabled diff --git a/services/audioflinger/FastMixer.cpp b/services/audioflinger/FastMixer.cpp index 85d637e..90122e0 100644 --- a/services/audioflinger/FastMixer.cpp +++ b/services/audioflinger/FastMixer.cpp @@ -238,7 +238,7 @@ bool FastMixer::threadLoop() } } - if ((format != previousFormat) || (frameCount != previous->mFrameCount)) { + if ((!Format_isEqual(format, previousFormat)) || (frameCount != previous->mFrameCount)) { // FIXME to avoid priority inversion, don't delete here delete mixer; mixer = NULL; @@ -440,8 +440,9 @@ bool FastMixer::threadLoop() } int64_t pts; - if (outputSink == NULL || (OK != outputSink->getNextWriteTimestamp(&pts))) + if (outputSink == NULL || (OK != outputSink->getNextWriteTimestamp(&pts))) { pts = AudioBufferProvider::kInvalidPTS; + } // process() is CPU-bound mixer->process(pts); @@ -695,7 +696,7 @@ static int compare_uint32_t(const void *pa, const void *pb) void FastMixerDumpState::dump(int fd) const { if (mCommand == FastMixerState::INITIAL) { - fdprintf(fd, "FastMixer not initialized\n"); + fdprintf(fd, " FastMixer not initialized\n"); return; } #define COMMAND_MAX 32 @@ -729,10 +730,10 @@ void FastMixerDumpState::dump(int fd) const double measuredWarmupMs = (mMeasuredWarmupTs.tv_sec * 1000.0) + (mMeasuredWarmupTs.tv_nsec / 1000000.0); double mixPeriodSec = (double) mFrameCount / (double) mSampleRate; - fdprintf(fd, "FastMixer command=%s writeSequence=%u framesWritten=%u\n" - " numTracks=%u writeErrors=%u underruns=%u overruns=%u\n" - " sampleRate=%u frameCount=%zu measuredWarmup=%.3g ms, warmupCycles=%u\n" - " mixPeriod=%.2f ms\n", + fdprintf(fd, " FastMixer command=%s writeSequence=%u framesWritten=%u\n" + " numTracks=%u writeErrors=%u underruns=%u overruns=%u\n" + " sampleRate=%u frameCount=%zu measuredWarmup=%.3g ms, warmupCycles=%u\n" + " mixPeriod=%.2f ms\n", string, mWriteSequence, mFramesWritten, mNumTracks, mWriteErrors, mUnderruns, mOverruns, mSampleRate, mFrameCount, measuredWarmupMs, mWarmupCycles, @@ -783,14 +784,20 @@ void FastMixerDumpState::dump(int fd) const previousCpukHz = sampleCpukHz; #endif } - fdprintf(fd, "Simple moving statistics over last %.1f seconds:\n", wall.n() * mixPeriodSec); - fdprintf(fd, " wall clock time in ms per mix cycle:\n" - " mean=%.2f min=%.2f max=%.2f stddev=%.2f\n", - wall.mean()*1e-6, wall.minimum()*1e-6, wall.maximum()*1e-6, wall.stddev()*1e-6); - fdprintf(fd, " raw CPU load in us per mix cycle:\n" - " mean=%.0f min=%.0f max=%.0f stddev=%.0f\n", - loadNs.mean()*1e-3, loadNs.minimum()*1e-3, loadNs.maximum()*1e-3, - loadNs.stddev()*1e-3); + if (n) { + fdprintf(fd, " Simple moving statistics over last %.1f seconds:\n", + wall.n() * mixPeriodSec); + fdprintf(fd, " wall clock time in ms per mix cycle:\n" + " mean=%.2f min=%.2f max=%.2f stddev=%.2f\n", + wall.mean()*1e-6, wall.minimum()*1e-6, wall.maximum()*1e-6, + wall.stddev()*1e-6); + fdprintf(fd, " raw CPU load in us per mix cycle:\n" + " mean=%.0f min=%.0f max=%.0f stddev=%.0f\n", + loadNs.mean()*1e-3, loadNs.minimum()*1e-3, loadNs.maximum()*1e-3, + loadNs.stddev()*1e-3); + } else { + fdprintf(fd, " No FastMixer statistics available currently\n"); + } #ifdef CPU_FREQUENCY_STATISTICS fdprintf(fd, " CPU clock frequency in MHz:\n" " mean=%.0f min=%.0f max=%.0f stddev=%.0f\n", @@ -808,9 +815,9 @@ void FastMixerDumpState::dump(int fd) const left.sample(tail[i]); right.sample(tail[n - (i + 1)]); } - fdprintf(fd, "Distribution of mix cycle times in ms for the tails (> ~3 stddev outliers):\n" - " left tail: mean=%.2f min=%.2f max=%.2f stddev=%.2f\n" - " right tail: mean=%.2f min=%.2f max=%.2f stddev=%.2f\n", + fdprintf(fd, " Distribution of mix cycle times in ms for the tails (> ~3 stddev outliers):\n" + " left tail: mean=%.2f min=%.2f max=%.2f stddev=%.2f\n" + " right tail: mean=%.2f min=%.2f max=%.2f stddev=%.2f\n", left.mean()*1e-6, left.minimum()*1e-6, left.maximum()*1e-6, left.stddev()*1e-6, right.mean()*1e-6, right.minimum()*1e-6, right.maximum()*1e-6, right.stddev()*1e-6); @@ -823,9 +830,9 @@ void FastMixerDumpState::dump(int fd) const // Instead we always display all tracks, with an indication // of whether we think the track is active. uint32_t trackMask = mTrackMask; - fdprintf(fd, "Fast tracks: kMaxFastTracks=%u activeMask=%#x\n", + fdprintf(fd, " Fast tracks: kMaxFastTracks=%u activeMask=%#x\n", FastMixerState::kMaxFastTracks, trackMask); - fdprintf(fd, "Index Active Full Partial Empty Recent Ready\n"); + fdprintf(fd, " Index Active Full Partial Empty Recent Ready\n"); for (uint32_t i = 0; i < FastMixerState::kMaxFastTracks; ++i, trackMask >>= 1) { bool isActive = trackMask & 1; const FastTrackDump *ftDump = &mTracks[i]; @@ -845,7 +852,7 @@ void FastMixerDumpState::dump(int fd) const mostRecent = "?"; break; } - fdprintf(fd, "%5u %6s %4u %7u %5u %7s %5zu\n", i, isActive ? "yes" : "no", + fdprintf(fd, " %5u %6s %4u %7u %5u %7s %5zu\n", i, isActive ? "yes" : "no", (underruns.mBitFields.mFull) & UNDERRUN_MASK, (underruns.mBitFields.mPartial) & UNDERRUN_MASK, (underruns.mBitFields.mEmpty) & UNDERRUN_MASK, diff --git a/services/audioflinger/PlaybackTracks.h b/services/audioflinger/PlaybackTracks.h index 43b77f3..b5e763d 100644 --- a/services/audioflinger/PlaybackTracks.h +++ b/services/audioflinger/PlaybackTracks.h @@ -34,9 +34,10 @@ public: int uid, IAudioFlinger::track_flags_t flags); virtual ~Track(); + virtual status_t initCheck() const; static void appendDumpHeader(String8& result); - void dump(char* buffer, size_t size); + void dump(char* buffer, size_t size, bool active); virtual status_t start(AudioSystem::sync_event_t event = AudioSystem::SYNC_EVENT_NONE, int triggerSession = 0); @@ -93,6 +94,8 @@ protected: bool isReady() const; void setPaused() { mState = PAUSED; } void reset(); + bool isFlushPending() const { return mFlushHwPending; } + void flushAck(); bool isOutputTrack() const { return (mStreamType == AUDIO_STREAM_CNT); @@ -154,6 +157,7 @@ private: bool mIsInvalid; // non-resettable latch, set by invalidate() AudioTrackServerProxy* mAudioTrackServerProxy; bool mResumeToStopping; // track was paused in stopping state. + bool mFlushHwPending; // track requests for thread flush }; // end of Track class TimedTrack : public Track { diff --git a/services/audioflinger/RecordTracks.h b/services/audioflinger/RecordTracks.h index 57de568..fc3171f 100644 --- a/services/audioflinger/RecordTracks.h +++ b/services/audioflinger/RecordTracks.h @@ -45,7 +45,7 @@ public: return tmp; } static void appendDumpHeader(String8& result); - void dump(char* buffer, size_t size); + void dump(char* buffer, size_t size, bool active); private: friend class AudioFlinger; // for mState @@ -59,5 +59,4 @@ private: // releaseBuffer() not overridden bool mOverflow; // overflow on most recent attempt to fill client buffer - AudioRecordServerProxy* mAudioRecordServerProxy; }; diff --git a/services/audioflinger/Threads.cpp b/services/audioflinger/Threads.cpp index 498ddb6..b064e89 100644 --- a/services/audioflinger/Threads.cpp +++ b/services/audioflinger/Threads.cpp @@ -185,7 +185,11 @@ CpuStats::CpuStats() { } -void CpuStats::sample(const String8 &title) { +void CpuStats::sample(const String8 &title +#ifndef DEBUG_CPU_USAGE + __unused +#endif + ) { #ifdef DEBUG_CPU_USAGE // get current thread's delta CPU time in wall clock ns double wcNs; @@ -269,8 +273,8 @@ AudioFlinger::ThreadBase::ThreadBase(const sp<AudioFlinger>& audioFlinger, audio : Thread(false /*canCallJava*/), mType(type), mAudioFlinger(audioFlinger), - // mSampleRate, mFrameCount, mChannelMask, mChannelCount, mFrameSize, and mFormat are - // set by PlaybackThread::readOutputParameters() or RecordThread::readInputParameters() + // mSampleRate, mFrameCount, mChannelMask, mChannelCount, mFrameSize, mFormat, mBufferSize + // are set by PlaybackThread::readOutputParameters() or RecordThread::readInputParameters() mParamStatus(NO_ERROR), //FIXME: mStandby should be true here. Is this some kind of hack? mStandby(false), mOutDevice(outDevice), mInDevice(inDevice), @@ -297,6 +301,17 @@ AudioFlinger::ThreadBase::~ThreadBase() } } +status_t AudioFlinger::ThreadBase::readyToRun() +{ + status_t status = initCheck(); + if (status == NO_ERROR) { + ALOGI("AudioFlinger's thread %p ready to run", this); + } else { + ALOGE("No working audio driver found."); + } + return status; +} + void AudioFlinger::ThreadBase::exit() { ALOGV("ThreadBase::exit"); @@ -369,7 +384,13 @@ void AudioFlinger::ThreadBase::sendPrioConfigEvent_l(pid_t pid, pid_t tid, int32 void AudioFlinger::ThreadBase::processConfigEvents() { - mLock.lock(); + Mutex::Autolock _l(mLock); + processConfigEvents_l(); +} + +// post condition: mConfigEvents.isEmpty() +void AudioFlinger::ThreadBase::processConfigEvents_l() +{ while (!mConfigEvents.isEmpty()) { ALOGV("processConfigEvents() remaining events %d", mConfigEvents.size()); ConfigEvent *event = mConfigEvents[0]; @@ -377,35 +398,81 @@ void AudioFlinger::ThreadBase::processConfigEvents() // release mLock before locking AudioFlinger mLock: lock order is always // AudioFlinger then ThreadBase to avoid cross deadlock mLock.unlock(); - switch(event->type()) { - case CFG_EVENT_PRIO: { - PrioConfigEvent *prioEvent = static_cast<PrioConfigEvent *>(event); - // FIXME Need to understand why this has be done asynchronously - int err = requestPriority(prioEvent->pid(), prioEvent->tid(), prioEvent->prio(), - true /*asynchronous*/); - if (err != 0) { - ALOGW("Policy SCHED_FIFO priority %d is unavailable for pid %d tid %d; " - "error %d", - prioEvent->prio(), prioEvent->pid(), prioEvent->tid(), err); - } - } break; - case CFG_EVENT_IO: { - IoConfigEvent *ioEvent = static_cast<IoConfigEvent *>(event); - mAudioFlinger->mLock.lock(); + switch (event->type()) { + case CFG_EVENT_PRIO: { + PrioConfigEvent *prioEvent = static_cast<PrioConfigEvent *>(event); + // FIXME Need to understand why this has be done asynchronously + int err = requestPriority(prioEvent->pid(), prioEvent->tid(), prioEvent->prio(), + true /*asynchronous*/); + if (err != 0) { + ALOGW("Policy SCHED_FIFO priority %d is unavailable for pid %d tid %d; error %d", + prioEvent->prio(), prioEvent->pid(), prioEvent->tid(), err); + } + } break; + case CFG_EVENT_IO: { + IoConfigEvent *ioEvent = static_cast<IoConfigEvent *>(event); + { + Mutex::Autolock _l(mAudioFlinger->mLock); audioConfigChanged_l(ioEvent->event(), ioEvent->param()); - mAudioFlinger->mLock.unlock(); - } break; - default: - ALOGE("processConfigEvents() unknown event type %d", event->type()); - break; + } + } break; + default: + ALOGE("processConfigEvents() unknown event type %d", event->type()); + break; } delete event; mLock.lock(); } - mLock.unlock(); } -void AudioFlinger::ThreadBase::dumpBase(int fd, const Vector<String16>& args) +String8 channelMaskToString(audio_channel_mask_t mask, bool output) { + String8 s; + if (output) { + if (mask & AUDIO_CHANNEL_OUT_FRONT_LEFT) s.append("front-left, "); + if (mask & AUDIO_CHANNEL_OUT_FRONT_RIGHT) s.append("front-right, "); + if (mask & AUDIO_CHANNEL_OUT_FRONT_CENTER) s.append("front-center, "); + if (mask & AUDIO_CHANNEL_OUT_LOW_FREQUENCY) s.append("low freq, "); + if (mask & AUDIO_CHANNEL_OUT_BACK_LEFT) s.append("back-left, "); + if (mask & AUDIO_CHANNEL_OUT_BACK_RIGHT) s.append("back-right, "); + if (mask & AUDIO_CHANNEL_OUT_FRONT_LEFT_OF_CENTER) s.append("front-left-of-center, "); + if (mask & AUDIO_CHANNEL_OUT_FRONT_RIGHT_OF_CENTER) s.append("front-right-of-center, "); + if (mask & AUDIO_CHANNEL_OUT_BACK_CENTER) s.append("back-center, "); + if (mask & AUDIO_CHANNEL_OUT_SIDE_LEFT) s.append("side-left, "); + if (mask & AUDIO_CHANNEL_OUT_SIDE_RIGHT) s.append("side-right, "); + if (mask & AUDIO_CHANNEL_OUT_TOP_CENTER) s.append("top-center ,"); + if (mask & AUDIO_CHANNEL_OUT_TOP_FRONT_LEFT) s.append("top-front-left, "); + if (mask & AUDIO_CHANNEL_OUT_TOP_FRONT_CENTER) s.append("top-front-center, "); + if (mask & AUDIO_CHANNEL_OUT_TOP_FRONT_RIGHT) s.append("top-front-right, "); + if (mask & AUDIO_CHANNEL_OUT_TOP_BACK_LEFT) s.append("top-back-left, "); + if (mask & AUDIO_CHANNEL_OUT_TOP_BACK_CENTER) s.append("top-back-center, " ); + if (mask & AUDIO_CHANNEL_OUT_TOP_BACK_RIGHT) s.append("top-back-right, " ); + if (mask & ~AUDIO_CHANNEL_OUT_ALL) s.append("unknown, "); + } else { + if (mask & AUDIO_CHANNEL_IN_LEFT) s.append("left, "); + if (mask & AUDIO_CHANNEL_IN_RIGHT) s.append("right, "); + if (mask & AUDIO_CHANNEL_IN_FRONT) s.append("front, "); + if (mask & AUDIO_CHANNEL_IN_BACK) s.append("back, "); + if (mask & AUDIO_CHANNEL_IN_LEFT_PROCESSED) s.append("left-processed, "); + if (mask & AUDIO_CHANNEL_IN_RIGHT_PROCESSED) s.append("right-processed, "); + if (mask & AUDIO_CHANNEL_IN_FRONT_PROCESSED) s.append("front-processed, "); + if (mask & AUDIO_CHANNEL_IN_BACK_PROCESSED) s.append("back-processed, "); + if (mask & AUDIO_CHANNEL_IN_PRESSURE) s.append("pressure, "); + if (mask & AUDIO_CHANNEL_IN_X_AXIS) s.append("X, "); + if (mask & AUDIO_CHANNEL_IN_Y_AXIS) s.append("Y, "); + if (mask & AUDIO_CHANNEL_IN_Z_AXIS) s.append("Z, "); + if (mask & AUDIO_CHANNEL_IN_VOICE_UPLINK) s.append("voice-uplink, "); + if (mask & AUDIO_CHANNEL_IN_VOICE_DNLINK) s.append("voice-dnlink, "); + if (mask & ~AUDIO_CHANNEL_IN_ALL) s.append("unknown, "); + } + int len = s.length(); + if (s.length() > 2) { + char *str = s.lockBuffer(len); + s.unlockBuffer(len - 2); + } + return s; +} + +void AudioFlinger::ThreadBase::dumpBase(int fd, const Vector<String16>& args __unused) { const size_t SIZE = 256; char buffer[SIZE]; @@ -413,47 +480,43 @@ void AudioFlinger::ThreadBase::dumpBase(int fd, const Vector<String16>& args) bool locked = AudioFlinger::dumpTryLock(mLock); if (!locked) { - snprintf(buffer, SIZE, "thread %p maybe dead locked\n", this); - write(fd, buffer, strlen(buffer)); - } - - snprintf(buffer, SIZE, "io handle: %d\n", mId); - result.append(buffer); - snprintf(buffer, SIZE, "TID: %d\n", getTid()); - result.append(buffer); - snprintf(buffer, SIZE, "standby: %d\n", mStandby); - result.append(buffer); - snprintf(buffer, SIZE, "Sample rate: %u\n", mSampleRate); - result.append(buffer); - snprintf(buffer, SIZE, "HAL frame count: %zu\n", mFrameCount); - result.append(buffer); - snprintf(buffer, SIZE, "Channel Count: %u\n", mChannelCount); - result.append(buffer); - snprintf(buffer, SIZE, "Channel Mask: 0x%08x\n", mChannelMask); - result.append(buffer); - snprintf(buffer, SIZE, "Format: %d\n", mFormat); - result.append(buffer); - snprintf(buffer, SIZE, "Frame size: %zu\n", mFrameSize); - result.append(buffer); - - snprintf(buffer, SIZE, "\nPending setParameters commands: \n"); - result.append(buffer); - result.append(" Index Command"); - for (size_t i = 0; i < mNewParameters.size(); ++i) { - snprintf(buffer, SIZE, "\n %02zu ", i); - result.append(buffer); - result.append(mNewParameters[i]); + fdprintf(fd, "thread %p maybe dead locked\n", this); + } + + fdprintf(fd, " I/O handle: %d\n", mId); + fdprintf(fd, " TID: %d\n", getTid()); + fdprintf(fd, " Standby: %s\n", mStandby ? "yes" : "no"); + fdprintf(fd, " Sample rate: %u\n", mSampleRate); + fdprintf(fd, " HAL frame count: %zu\n", mFrameCount); + fdprintf(fd, " HAL buffer size: %u bytes\n", mBufferSize); + fdprintf(fd, " Channel Count: %u\n", mChannelCount); + fdprintf(fd, " Channel Mask: 0x%08x (%s)\n", mChannelMask, + channelMaskToString(mChannelMask, mType != RECORD).string()); + fdprintf(fd, " Format: 0x%x (%s)\n", mFormat, formatToString(mFormat)); + fdprintf(fd, " Frame size: %zu\n", mFrameSize); + fdprintf(fd, " Pending setParameters commands:"); + size_t numParams = mNewParameters.size(); + if (numParams) { + fdprintf(fd, "\n Index Command"); + for (size_t i = 0; i < numParams; ++i) { + fdprintf(fd, "\n %02zu ", i); + fdprintf(fd, mNewParameters[i]); + } + fdprintf(fd, "\n"); + } else { + fdprintf(fd, " none\n"); } - - snprintf(buffer, SIZE, "\n\nPending config events: \n"); - result.append(buffer); - for (size_t i = 0; i < mConfigEvents.size(); i++) { - mConfigEvents[i]->dump(buffer, SIZE); - result.append(buffer); + fdprintf(fd, " Pending config events:"); + size_t numConfig = mConfigEvents.size(); + if (numConfig) { + for (size_t i = 0; i < numConfig; i++) { + mConfigEvents[i]->dump(buffer, SIZE); + fdprintf(fd, "\n %s", buffer); + } + fdprintf(fd, "\n"); + } else { + fdprintf(fd, " none\n"); } - result.append("\n"); - - write(fd, result.string(), result.size()); if (locked) { mLock.unlock(); @@ -466,10 +529,11 @@ void AudioFlinger::ThreadBase::dumpEffectChains(int fd, const Vector<String16>& char buffer[SIZE]; String8 result; - snprintf(buffer, SIZE, "\n- %zu Effect Chains:\n", mEffectChains.size()); + size_t numEffectChains = mEffectChains.size(); + snprintf(buffer, SIZE, " %zu Effect Chains\n", numEffectChains); write(fd, buffer, strlen(buffer)); - for (size_t i = 0; i < mEffectChains.size(); ++i) { + for (size_t i = 0; i < numEffectChains; ++i) { sp<EffectChain> chain = mEffectChains[i]; if (chain != 0) { chain->dump(fd, args); @@ -586,7 +650,7 @@ void AudioFlinger::ThreadBase::clearPowerManager() mPowerManager.clear(); } -void AudioFlinger::ThreadBase::PMDeathRecipient::binderDied(const wp<IBinder>& who) +void AudioFlinger::ThreadBase::PMDeathRecipient::binderDied(const wp<IBinder>& who __unused) { sp<ThreadBase> thread = mThread.promote(); if (thread != 0) { @@ -739,8 +803,7 @@ sp<AudioFlinger::EffectHandle> AudioFlinger::ThreadBase::createEffect_l( int sessionId, effect_descriptor_t *desc, int *enabled, - status_t *status - ) + status_t *status) { sp<EffectModule> effect; sp<EffectHandle> handle; @@ -829,7 +892,10 @@ sp<AudioFlinger::EffectHandle> AudioFlinger::ThreadBase::createEffect_l( } // create effect handle and connect it to effect module handle = new EffectHandle(effect, client, effectClient, priority); - lStatus = effect->addHandle(handle.get()); + lStatus = handle->initCheck(); + if (lStatus == OK) { + lStatus = effect->addHandle(handle.get()); + } if (enabled != NULL) { *enabled = (int)effect->isEnabled(); } @@ -850,9 +916,7 @@ Exit: handle.clear(); } - if (status != NULL) { - *status = lStatus; - } + *status = lStatus; return handle; } @@ -1002,7 +1066,7 @@ AudioFlinger::PlaybackThread::PlaybackThread(const sp<AudioFlinger>& audioFlinge type_t type) : ThreadBase(audioFlinger, id, device, AUDIO_DEVICE_NONE, type), mNormalFrameCount(0), mMixBuffer(NULL), - mAllocMixBuffer(NULL), mSuspended(0), mBytesWritten(0), + mSuspended(0), mBytesWritten(0), mActiveTracksGeneration(0), // mStreamTypes[] initialized in constructor body mOutput(output), @@ -1060,7 +1124,7 @@ AudioFlinger::PlaybackThread::PlaybackThread(const sp<AudioFlinger>& audioFlinge AudioFlinger::PlaybackThread::~PlaybackThread() { mAudioFlinger->unregisterWriter(mNBLogWriter); - delete [] mAllocMixBuffer; + delete[] mMixBuffer; } void AudioFlinger::PlaybackThread::dump(int fd, const Vector<String16>& args) @@ -1070,13 +1134,13 @@ void AudioFlinger::PlaybackThread::dump(int fd, const Vector<String16>& args) dumpEffectChains(fd, args); } -void AudioFlinger::PlaybackThread::dumpTracks(int fd, const Vector<String16>& args) +void AudioFlinger::PlaybackThread::dumpTracks(int fd, const Vector<String16>& args __unused) { const size_t SIZE = 256; char buffer[SIZE]; String8 result; - result.appendFormat("Output thread %p stream volumes in dB:\n ", this); + result.appendFormat(" Stream volumes in dB: "); for (int i = 0; i < AUDIO_STREAM_CNT; ++i) { const stream_type_t *st = &mStreamTypes[i]; if (i > 0) { @@ -1091,75 +1155,67 @@ void AudioFlinger::PlaybackThread::dumpTracks(int fd, const Vector<String16>& ar write(fd, result.string(), result.length()); result.clear(); - snprintf(buffer, SIZE, "Output thread %p tracks\n", this); - result.append(buffer); - Track::appendDumpHeader(result); - for (size_t i = 0; i < mTracks.size(); ++i) { - sp<Track> track = mTracks[i]; - if (track != 0) { - track->dump(buffer, SIZE); - result.append(buffer); + // These values are "raw"; they will wrap around. See prepareTracks_l() for a better way. + FastTrackUnderruns underruns = getFastTrackUnderruns(0); + fdprintf(fd, " Normal mixer raw underrun counters: partial=%u empty=%u\n", + underruns.mBitFields.mPartial, underruns.mBitFields.mEmpty); + + size_t numtracks = mTracks.size(); + size_t numactive = mActiveTracks.size(); + fdprintf(fd, " %d Tracks", numtracks); + size_t numactiveseen = 0; + if (numtracks) { + fdprintf(fd, " of which %d are active\n", numactive); + Track::appendDumpHeader(result); + for (size_t i = 0; i < numtracks; ++i) { + sp<Track> track = mTracks[i]; + if (track != 0) { + bool active = mActiveTracks.indexOf(track) >= 0; + if (active) { + numactiveseen++; + } + track->dump(buffer, SIZE, active); + result.append(buffer); + } } + } else { + result.append("\n"); } - - snprintf(buffer, SIZE, "Output thread %p active tracks\n", this); - result.append(buffer); - Track::appendDumpHeader(result); - for (size_t i = 0; i < mActiveTracks.size(); ++i) { - sp<Track> track = mActiveTracks[i].promote(); - if (track != 0) { - track->dump(buffer, SIZE); - result.append(buffer); + if (numactiveseen != numactive) { + // some tracks in the active list were not in the tracks list + snprintf(buffer, SIZE, " The following tracks are in the active list but" + " not in the track list\n"); + result.append(buffer); + Track::appendDumpHeader(result); + for (size_t i = 0; i < numactive; ++i) { + sp<Track> track = mActiveTracks[i].promote(); + if (track != 0 && mTracks.indexOf(track) < 0) { + track->dump(buffer, SIZE, true); + result.append(buffer); + } } } + write(fd, result.string(), result.size()); - // These values are "raw"; they will wrap around. See prepareTracks_l() for a better way. - FastTrackUnderruns underruns = getFastTrackUnderruns(0); - fdprintf(fd, "Normal mixer raw underrun counters: partial=%u empty=%u\n", - underruns.mBitFields.mPartial, underruns.mBitFields.mEmpty); } void AudioFlinger::PlaybackThread::dumpInternals(int fd, const Vector<String16>& args) { - const size_t SIZE = 256; - char buffer[SIZE]; - String8 result; - - snprintf(buffer, SIZE, "\nOutput thread %p internals\n", this); - result.append(buffer); - snprintf(buffer, SIZE, "Normal frame count: %zu\n", mNormalFrameCount); - result.append(buffer); - snprintf(buffer, SIZE, "last write occurred (msecs): %llu\n", - ns2ms(systemTime() - mLastWriteTime)); - result.append(buffer); - snprintf(buffer, SIZE, "total writes: %d\n", mNumWrites); - result.append(buffer); - snprintf(buffer, SIZE, "delayed writes: %d\n", mNumDelayedWrites); - result.append(buffer); - snprintf(buffer, SIZE, "blocked in write: %d\n", mInWrite); - result.append(buffer); - snprintf(buffer, SIZE, "suspend count: %d\n", mSuspended); - result.append(buffer); - snprintf(buffer, SIZE, "mix buffer : %p\n", mMixBuffer); - result.append(buffer); - write(fd, result.string(), result.size()); - fdprintf(fd, "Fast track availMask=%#x\n", mFastTrackAvailMask); + fdprintf(fd, "\nOutput thread %p:\n", this); + fdprintf(fd, " Normal frame count: %zu\n", mNormalFrameCount); + fdprintf(fd, " Last write occurred (msecs): %llu\n", ns2ms(systemTime() - mLastWriteTime)); + fdprintf(fd, " Total writes: %d\n", mNumWrites); + fdprintf(fd, " Delayed writes: %d\n", mNumDelayedWrites); + fdprintf(fd, " Blocked in write: %s\n", mInWrite ? "yes" : "no"); + fdprintf(fd, " Suspend count: %d\n", mSuspended); + fdprintf(fd, " Mix buffer : %p\n", mMixBuffer); + fdprintf(fd, " Fast track availMask=%#x\n", mFastTrackAvailMask); dumpBase(fd, args); } // Thread virtuals -status_t AudioFlinger::PlaybackThread::readyToRun() -{ - status_t status = initCheck(); - if (status == NO_ERROR) { - ALOGI("AudioFlinger's thread %p ready to run", this); - } else { - ALOGE("No working audio driver found."); - } - return status; -} void AudioFlinger::PlaybackThread::onFirstRef() { @@ -1182,7 +1238,7 @@ sp<AudioFlinger::PlaybackThread::Track> AudioFlinger::PlaybackThread::createTrac uint32_t sampleRate, audio_format_t format, audio_channel_mask_t channelMask, - size_t frameCount, + size_t *pFrameCount, const sp<IMemory>& sharedBuffer, int sessionId, IAudioFlinger::track_flags_t *flags, @@ -1190,6 +1246,7 @@ sp<AudioFlinger::PlaybackThread::Track> AudioFlinger::PlaybackThread::createTrac int uid, status_t *status) { + size_t frameCount = *pFrameCount; sp<Track> track; status_t lStatus; @@ -1256,12 +1313,13 @@ sp<AudioFlinger::PlaybackThread::Track> AudioFlinger::PlaybackThread::createTrac } } } + *pFrameCount = frameCount; if (mType == DIRECT) { if ((format & AUDIO_FORMAT_MAIN_MASK) == AUDIO_FORMAT_PCM) { if (sampleRate != mSampleRate || format != mFormat || channelMask != mChannelMask) { - ALOGE("createTrack_l() Bad parameter: sampleRate %u format %d, channelMask 0x%08x " - "for output %p with format %d", + ALOGE("createTrack_l() Bad parameter: sampleRate %u format %#x, channelMask 0x%08x " + "for output %p with format %#x", sampleRate, format, channelMask, mOutput, mFormat); lStatus = BAD_VALUE; goto Exit; @@ -1269,16 +1327,16 @@ sp<AudioFlinger::PlaybackThread::Track> AudioFlinger::PlaybackThread::createTrac } } else if (mType == OFFLOAD) { if (sampleRate != mSampleRate || format != mFormat || channelMask != mChannelMask) { - ALOGE("createTrack_l() Bad parameter: sampleRate %d format %d, channelMask 0x%08x \"" - "for output %p with format %d", + ALOGE("createTrack_l() Bad parameter: sampleRate %d format %#x, channelMask 0x%08x \"" + "for output %p with format %#x", sampleRate, format, channelMask, mOutput, mFormat); lStatus = BAD_VALUE; goto Exit; } } else { if ((format & AUDIO_FORMAT_MAIN_MASK) != AUDIO_FORMAT_PCM) { - ALOGE("createTrack_l() Bad parameter: format %d \"" - "for output %p with format %d", + ALOGE("createTrack_l() Bad parameter: format %#x \"" + "for output %p with format %#x", format, mOutput, mFormat); lStatus = BAD_VALUE; goto Exit; @@ -1324,8 +1382,13 @@ sp<AudioFlinger::PlaybackThread::Track> AudioFlinger::PlaybackThread::createTrac track = TimedTrack::create(this, client, streamType, sampleRate, format, channelMask, frameCount, sharedBuffer, sessionId, uid); } - if (track == 0 || track->getCblk() == NULL || track->name() < 0) { - lStatus = NO_MEMORY; + + // new Track always returns non-NULL, + // but TimedTrack::create() is a factory that could fail by returning NULL + lStatus = track != 0 ? track->initCheck() : (status_t) NO_MEMORY; + if (lStatus != NO_ERROR) { + ALOGE("createTrack_l() initCheck failed %d; no control block?", lStatus); + // track must be cleared from the caller as the caller has the AF lock goto Exit; } @@ -1350,9 +1413,7 @@ sp<AudioFlinger::PlaybackThread::Track> AudioFlinger::PlaybackThread::createTrac lStatus = NO_ERROR; Exit: - if (status) { - *status = lStatus; - } + *status = lStatus; return track; } @@ -1471,9 +1532,7 @@ status_t AudioFlinger::PlaybackThread::addTrack_l(const sp<Track>& track) status = NO_ERROR; } - ALOGV("signal playback thread"); - broadcast_l(); - + onAddNewTrack_l(); return status; } @@ -1599,7 +1658,7 @@ void AudioFlinger::PlaybackThread::resetDraining(uint32_t sequence) // static int AudioFlinger::PlaybackThread::asyncCallback(stream_callback_event_t event, - void *param, + void *param __unused, void *cookie) { AudioFlinger::PlaybackThread *me = (AudioFlinger::PlaybackThread *)cookie; @@ -1633,14 +1692,15 @@ void AudioFlinger::PlaybackThread::readOutputParameters() mChannelCount = popcount(mChannelMask); mFormat = mOutput->stream->common.get_format(&mOutput->stream->common); if (!audio_is_valid_format(mFormat)) { - LOG_FATAL("HAL format %d not valid for output", mFormat); + LOG_FATAL("HAL format %#x not valid for output", mFormat); } if ((mType == MIXER || mType == DUPLICATING) && mFormat != AUDIO_FORMAT_PCM_16_BIT) { - LOG_FATAL("HAL format %d not supported for mixed output; must be AUDIO_FORMAT_PCM_16_BIT", + LOG_FATAL("HAL format %#x not supported for mixed output; must be AUDIO_FORMAT_PCM_16_BIT", mFormat); } mFrameSize = audio_stream_frame_size(&mOutput->stream->common); - mFrameCount = mOutput->stream->common.get_buffer_size(&mOutput->stream->common) / mFrameSize; + mBufferSize = mOutput->stream->common.get_buffer_size(&mOutput->stream->common); + mFrameCount = mBufferSize / mFrameSize; if (mFrameCount & 15) { ALOGW("HAL output buffer size is %u frames but AudioMixer requires multiples of 16 frames", mFrameCount); @@ -1697,11 +1757,11 @@ void AudioFlinger::PlaybackThread::readOutputParameters() ALOGI("HAL output buffer size %u frames, normal mix buffer size %u frames", mFrameCount, mNormalFrameCount); - delete[] mAllocMixBuffer; - size_t align = (mFrameSize < sizeof(int16_t)) ? sizeof(int16_t) : mFrameSize; - mAllocMixBuffer = new int8_t[mNormalFrameCount * mFrameSize + align - 1]; - mMixBuffer = (int16_t *) ((((size_t)mAllocMixBuffer + align - 1) / align) * align); - memset(mMixBuffer, 0, mNormalFrameCount * mFrameSize); + delete[] mMixBuffer; + size_t normalBufferSize = mNormalFrameCount * mFrameSize; + // For historical reasons mMixBuffer is int16_t[], but mFrameSize can be odd (such as 1) + mMixBuffer = new int16_t[(normalBufferSize + 1) >> 1]; + memset(mMixBuffer, 0, normalBufferSize); // force reconfiguration of effect chains and engines to take new buffer size and audio // parameters into account @@ -1839,7 +1899,7 @@ void AudioFlinger::PlaybackThread::threadLoop_removeTracks( const Vector< sp<Track> >& tracksToRemove) { size_t count = tracksToRemove.size(); - if (count) { + if (count > 0) { for (size_t i = 0 ; i < count ; i++) { const sp<Track>& track = tracksToRemove.itemAt(i); if (!track->isOutputTrack()) { @@ -1915,7 +1975,7 @@ ssize_t AudioFlinger::PlaybackThread::threadLoop_write() // otherwise use the HAL / AudioStreamOut directly } else { // Direct output and offload threads - size_t offset = (mCurrentWriteLength - mBytesRemaining) / sizeof(int16_t); + size_t offset = (mCurrentWriteLength - mBytesRemaining); if (mUseAsyncWrite) { ALOGW_IF(mWriteAckSequence & 1, "threadLoop_write(): out of sequence write request"); mWriteAckSequence += 2; @@ -1926,7 +1986,7 @@ ssize_t AudioFlinger::PlaybackThread::threadLoop_write() // FIXME We should have an implementation of timestamps for direct output threads. // They are used e.g for multichannel PCM playback over HDMI. bytesWritten = mOutput->stream->write(mOutput->stream, - mMixBuffer + offset, mBytesRemaining); + (char *)mMixBuffer + offset, mBytesRemaining); if (mUseAsyncWrite && ((bytesWritten < 0) || (bytesWritten == (ssize_t)mBytesRemaining))) { // do not wait for async callback in case of error of full write @@ -2346,20 +2406,20 @@ bool AudioFlinger::PlaybackThread::threadLoop() (mMixerStatus == MIXER_DRAIN_ALL)) { threadLoop_drain(); } -if (mType == MIXER) { - // write blocked detection - nsecs_t now = systemTime(); - nsecs_t delta = now - mLastWriteTime; - if (!mStandby && delta > maxPeriod) { - mNumDelayedWrites++; - if ((now - lastWarning) > kWarningThrottleNs) { - ATRACE_NAME("underrun"); - ALOGW("write blocked for %llu msecs, %d delayed writes, thread %p", - ns2ms(delta), mNumDelayedWrites, this); - lastWarning = now; + if (mType == MIXER) { + // write blocked detection + nsecs_t now = systemTime(); + nsecs_t delta = now - mLastWriteTime; + if (!mStandby && delta > maxPeriod) { + mNumDelayedWrites++; + if ((now - lastWarning) > kWarningThrottleNs) { + ATRACE_NAME("underrun"); + ALOGW("write blocked for %llu msecs, %d delayed writes, thread %p", + ns2ms(delta), mNumDelayedWrites, this); + lastWarning = now; + } } } -} } else { usleep(sleepTime); @@ -2407,7 +2467,7 @@ if (mType == MIXER) { void AudioFlinger::PlaybackThread::removeTracks_l(const Vector< sp<Track> >& tracksToRemove) { size_t count = tracksToRemove.size(); - if (count) { + if (count > 0) { for (size_t i=0 ; i<count ; i++) { const sp<Track>& track = tracksToRemove.itemAt(i); mActiveTracks.remove(track); @@ -2711,12 +2771,6 @@ void AudioFlinger::MixerThread::threadLoop_standby() PlaybackThread::threadLoop_standby(); } -// Empty implementation for standard mixer -// Overridden for offloaded playback -void AudioFlinger::PlaybackThread::flushOutput_l() -{ -} - bool AudioFlinger::PlaybackThread::waitingAsyncCallback_l() { return false; @@ -2748,6 +2802,12 @@ void AudioFlinger::PlaybackThread::threadLoop_standby() } } +void AudioFlinger::PlaybackThread::onAddNewTrack_l() +{ + ALOGV("signal playback thread"); + broadcast_l(); +} + void AudioFlinger::MixerThread::threadLoop_mix() { // obtain the presentation timestamp of the next output buffer @@ -2800,7 +2860,7 @@ void AudioFlinger::MixerThread::threadLoop_sleepTime() sleepTime = idleSleepTime; } } else if (mBytesWritten != 0 || (mMixerStatus == MIXER_TRACKS_ENABLED)) { - memset (mMixBuffer, 0, mixBufferSize); + memset(mMixBuffer, 0, mixBufferSize); sleepTime = 0; ALOGV_IF(mBytesWritten == 0 && (mMixerStatus == MIXER_TRACKS_ENABLED), "anticipated start"); @@ -3025,12 +3085,14 @@ AudioFlinger::PlaybackThread::mixer_state AudioFlinger::MixerThread::prepareTrac // +1 for rounding and +1 for additional sample needed for interpolation desiredFrames = (mNormalFrameCount * sr) / mSampleRate + 1 + 1; // add frames already consumed but not yet released by the resampler - // because cblk->framesReady() will include these frames + // because mAudioTrackServerProxy->framesReady() will include these frames desiredFrames += mAudioMixer->getUnreleasedFrames(track->name()); +#if 0 // the minimum track buffer size is normally twice the number of frames necessary // to fill one buffer and the resampler should not leave more than one buffer worth // of unreleased frames after each pass, but just in case... ALOG_ASSERT(desiredFrames <= cblk->frameCount_); +#endif } uint32_t minFrames = 1; if ((track->sharedBuffer() == 0) && !track->isStopped() && !track->isPausing() && @@ -3356,6 +3418,7 @@ bool AudioFlinger::MixerThread::checkForNewParameters_l() if ((audio_format_t) value != AUDIO_FORMAT_PCM_16_BIT) { status = BAD_VALUE; } else { + // no need to save value, since it's constant reconfig = true; } } @@ -3363,6 +3426,7 @@ bool AudioFlinger::MixerThread::checkForNewParameters_l() if ((audio_channel_mask_t) value != AUDIO_CHANNEL_OUT_STEREO) { status = BAD_VALUE; } else { + // no need to save value, since it's constant reconfig = true; } } @@ -3466,9 +3530,7 @@ void AudioFlinger::MixerThread::dumpInternals(int fd, const Vector<String16>& ar PlaybackThread::dumpInternals(fd, args); - snprintf(buffer, SIZE, "AudioMixer tracks: %08x\n", mAudioMixer->trackNames()); - result.append(buffer); - write(fd, result.string(), result.size()); + fdprintf(fd, " AudioMixer tracks: 0x%08x\n", mAudioMixer->trackNames()); // Make a non-atomic copy of fast mixer dump state so it won't change underneath us const FastMixerDumpState copy(mFastMixerDumpState); @@ -3722,14 +3784,14 @@ void AudioFlinger::DirectOutputThread::threadLoop_sleepTime() } // getTrackName_l() must be called with ThreadBase::mLock held -int AudioFlinger::DirectOutputThread::getTrackName_l(audio_channel_mask_t channelMask, - int sessionId) +int AudioFlinger::DirectOutputThread::getTrackName_l(audio_channel_mask_t channelMask __unused, + int sessionId __unused) { return 0; } // deleteTrackName_l() must be called with ThreadBase::mLock held -void AudioFlinger::DirectOutputThread::deleteTrackName_l(int name) +void AudioFlinger::DirectOutputThread::deleteTrackName_l(int name __unused) { } @@ -3982,6 +4044,17 @@ AudioFlinger::PlaybackThread::mixer_state AudioFlinger::OffloadThread::prepareTr sp<Track> l = mLatestActiveTrack.promote(); bool last = l.get() == track; + if (track->isInvalid()) { + ALOGW("An invalidated track shouldn't be in active list"); + tracksToRemove->add(track); + continue; + } + + if (track->mState == TrackBase::IDLE) { + ALOGW("An idle track shouldn't be in active list"); + continue; + } + if (track->isPausing()) { track->setPaused(); if (last) { @@ -4000,6 +4073,11 @@ AudioFlinger::PlaybackThread::mixer_state AudioFlinger::OffloadThread::prepareTr mBytesRemaining = 0; // stop writing } tracksToRemove->add(track); + } else if (track->isFlushPending()) { + track->flushAck(); + if (last) { + mFlushPending = true; + } } else if (track->framesReady() && track->isReady() && !track->isPaused() && !track->isTerminated() && !track->isStopping_2()) { ALOGVV("OffloadThread: track %d s=%08x [OK]", track->name(), cblk->mServer); @@ -4049,7 +4127,6 @@ AudioFlinger::PlaybackThread::mixer_state AudioFlinger::OffloadThread::prepareTr // seek when resuming. if (previousTrack->sessionId() != track->sessionId()) { previousTrack->invalidate(); - mFlushPending = true; } } } @@ -4125,9 +4202,6 @@ AudioFlinger::PlaybackThread::mixer_state AudioFlinger::OffloadThread::prepareTr // if resume is received before pause is executed. if (!mStandby && (doHwPause || (mFlushPending && !mHwPaused && (count != 0)))) { mOutput->stream->pause(mOutput->stream); - if (!doHwPause) { - doHwResume = true; - } } if (mFlushPending) { flushHw_l(); @@ -4143,11 +4217,6 @@ AudioFlinger::PlaybackThread::mixer_state AudioFlinger::OffloadThread::prepareTr return mixerStatus; } -void AudioFlinger::OffloadThread::flushOutput_l() -{ - mFlushPending = true; -} - // must be called with thread mutex locked bool AudioFlinger::OffloadThread::waitingAsyncCallback_l() { @@ -4162,15 +4231,15 @@ bool AudioFlinger::OffloadThread::waitingAsyncCallback_l() // must be called with thread mutex locked bool AudioFlinger::OffloadThread::shouldStandby_l() { - bool TrackPaused = false; + bool trackPaused = false; // do not put the HAL in standby when paused. AwesomePlayer clear the offloaded AudioTrack // after a timeout and we will enter standby then. if (mTracks.size() > 0) { - TrackPaused = mTracks[mTracks.size() - 1]->isPaused(); + trackPaused = mTracks[mTracks.size() - 1]->isPaused(); } - return !mStandby && !TrackPaused; + return !mStandby && !trackPaused; } @@ -4188,6 +4257,8 @@ void AudioFlinger::OffloadThread::flushHw_l() mBytesRemaining = 0; mPausedWriteLength = 0; mPausedBytesRemaining = 0; + mHwPaused = false; + if (mUseAsyncWrite) { // discard any pending drain or write ack by incrementing sequence mWriteAckSequence = (mWriteAckSequence + 2) & ~1; @@ -4198,6 +4269,18 @@ void AudioFlinger::OffloadThread::flushHw_l() } } +void AudioFlinger::OffloadThread::onAddNewTrack_l() +{ + sp<Track> previousTrack = mPreviousTrack.promote(); + sp<Track> latestTrack = mLatestActiveTrack.promote(); + + if (previousTrack != 0 && latestTrack != 0 && + (previousTrack->sessionId() != latestTrack->sessionId())) { + mFlushPending = true; + } + PlaybackThread::onAddNewTrack_l(); +} + // ---------------------------------------------------------------------------- AudioFlinger::DuplicatingThread::DuplicatingThread(const sp<AudioFlinger>& audioFlinger, @@ -4377,8 +4460,10 @@ AudioFlinger::RecordThread::RecordThread(const sp<AudioFlinger>& audioFlinger, #endif ) : ThreadBase(audioFlinger, id, outDevice, inDevice, RECORD), - mInput(input), mResampler(NULL), mRsmpOutBuffer(NULL), mRsmpInBuffer(NULL), - // mRsmpInIndex and mBufferSize set by readInputParameters() + mInput(input), mActiveTracksGen(0), mResampler(NULL), mRsmpOutBuffer(NULL), mRsmpInBuffer(NULL), + // mRsmpInFrames, mRsmpInFramesP2, mRsmpInUnrel, mRsmpInFront, and mRsmpInRear + // are set by readInputParameters() + // mRsmpInIndex LEGACY mReqChannelCount(popcount(channelMask)), mReqSampleRate(sampleRate) // mBytesRead is only meaningful while active, and so is cleared in start() @@ -4388,6 +4473,7 @@ AudioFlinger::RecordThread::RecordThread(const sp<AudioFlinger>& audioFlinger, #endif { snprintf(mName, kNameLength, "AudioIn_%X", id); + mNBLogWriter = audioFlinger->newWriter_l(kLogSize, mName); readInputParameters(); } @@ -4395,6 +4481,7 @@ AudioFlinger::RecordThread::RecordThread(const sp<AudioFlinger>& audioFlinger, AudioFlinger::RecordThread::~RecordThread() { + mAudioFlinger->unregisterWriter(mNBLogWriter); delete[] mRsmpInBuffer; delete mResampler; delete[] mRsmpOutBuffer; @@ -4405,230 +4492,323 @@ void AudioFlinger::RecordThread::onFirstRef() run(mName, PRIORITY_URGENT_AUDIO); } -status_t AudioFlinger::RecordThread::readyToRun() -{ - status_t status = initCheck(); - ALOGW_IF(status != NO_ERROR,"RecordThread %p could not initialize", this); - return status; -} - bool AudioFlinger::RecordThread::threadLoop() { - AudioBufferProvider::Buffer buffer; - sp<RecordTrack> activeTrack; - Vector< sp<EffectChain> > effectChains; - nsecs_t lastWarning = 0; inputStandBy(); - { - Mutex::Autolock _l(mLock); - activeTrack = mActiveTrack; - acquireWakeLock_l(activeTrack != 0 ? activeTrack->uid() : -1); - } // used to verify we've read at least once before evaluating how many bytes were read bool readOnce = false; + // used to request a deferred sleep, to be executed later while mutex is unlocked + bool doSleep = false; + +reacquire_wakelock: + sp<RecordTrack> activeTrack; + int activeTracksGen; + { + Mutex::Autolock _l(mLock); + size_t size = mActiveTracks.size(); + activeTracksGen = mActiveTracksGen; + if (size > 0) { + // FIXME an arbitrary choice + activeTrack = mActiveTracks[0]; + acquireWakeLock_l(activeTrack->uid()); + if (size > 1) { + SortedVector<int> tmp; + for (size_t i = 0; i < size; i++) { + tmp.add(mActiveTracks[i]->uid()); + } + updateWakeLockUids_l(tmp); + } + } else { + acquireWakeLock_l(-1); + } + } + // start recording - while (!exitPending()) { + for (;;) { + TrackBase::track_state activeTrackState; + Vector< sp<EffectChain> > effectChains; - processConfigEvents(); + // sleep with mutex unlocked + if (doSleep) { + doSleep = false; + usleep(kRecordThreadSleepUs); + } { // scope for mLock Mutex::Autolock _l(mLock); - checkForNewParameters_l(); - if (mActiveTrack != 0 && activeTrack != mActiveTrack) { - SortedVector<int> tmp; - tmp.add(mActiveTrack->uid()); - updateWakeLockUids_l(tmp); - } - activeTrack = mActiveTrack; - if (mActiveTrack == 0 && mConfigEvents.isEmpty()) { - standby(); - if (exitPending()) { - break; - } + processConfigEvents_l(); + // return value 'reconfig' is currently unused + bool reconfig = checkForNewParameters_l(); + // check exitPending here because checkForNewParameters_l() and + // checkForNewParameters_l() can temporarily release mLock + if (exitPending()) { + break; + } + + // if no active track(s), then standby and release wakelock + size_t size = mActiveTracks.size(); + if (size == 0) { + standbyIfNotAlreadyInStandby(); + // exitPending() can't become true here releaseWakeLock_l(); ALOGV("RecordThread: loop stopping"); // go to sleep mWaitWorkCV.wait(mLock); ALOGV("RecordThread: loop starting"); - acquireWakeLock_l(mActiveTrack != 0 ? mActiveTrack->uid() : -1); + goto reacquire_wakelock; + } + + if (mActiveTracksGen != activeTracksGen) { + activeTracksGen = mActiveTracksGen; + SortedVector<int> tmp; + for (size_t i = 0; i < size; i++) { + tmp.add(mActiveTracks[i]->uid()); + } + updateWakeLockUids_l(tmp); + // FIXME an arbitrary choice + activeTrack = mActiveTracks[0]; + } + + if (activeTrack->isTerminated()) { + removeTrack_l(activeTrack); + mActiveTracks.remove(activeTrack); + mActiveTracksGen++; continue; } - if (mActiveTrack != 0) { - if (mActiveTrack->isTerminated()) { - removeTrack_l(mActiveTrack); - mActiveTrack.clear(); - } else if (mActiveTrack->mState == TrackBase::PAUSING) { - standby(); - mActiveTrack.clear(); + + activeTrackState = activeTrack->mState; + switch (activeTrackState) { + case TrackBase::PAUSING: + standbyIfNotAlreadyInStandby(); + mActiveTracks.remove(activeTrack); + mActiveTracksGen++; + mStartStopCond.broadcast(); + doSleep = true; + continue; + + case TrackBase::RESUMING: + mStandby = false; + if (mReqChannelCount != activeTrack->channelCount()) { + mActiveTracks.remove(activeTrack); + mActiveTracksGen++; mStartStopCond.broadcast(); - } else if (mActiveTrack->mState == TrackBase::RESUMING) { - if (mReqChannelCount != mActiveTrack->channelCount()) { - mActiveTrack.clear(); - mStartStopCond.broadcast(); - } else if (readOnce) { - // record start succeeds only if first read from audio input - // succeeds - if (mBytesRead >= 0) { - mActiveTrack->mState = TrackBase::ACTIVE; - } else { - mActiveTrack.clear(); - } - mStartStopCond.broadcast(); + continue; + } + if (readOnce) { + mStartStopCond.broadcast(); + // record start succeeds only if first read from audio input succeeds + if (mBytesRead < 0) { + mActiveTracks.remove(activeTrack); + mActiveTracksGen++; + continue; } - mStandby = false; + activeTrack->mState = TrackBase::ACTIVE; } + break; + + case TrackBase::ACTIVE: + break; + + case TrackBase::IDLE: + doSleep = true; + continue; + + default: + LOG_FATAL("Unexpected activeTrackState %d", activeTrackState); } lockEffectChains_l(effectChains); } - if (mActiveTrack != 0) { - if (mActiveTrack->mState != TrackBase::ACTIVE && - mActiveTrack->mState != TrackBase::RESUMING) { - unlockEffectChains(effectChains); - usleep(kRecordThreadSleepUs); - continue; - } - for (size_t i = 0; i < effectChains.size(); i ++) { - effectChains[i]->process_l(); - } + // thread mutex is now unlocked, mActiveTracks unknown, activeTrack != 0, kept, immutable + // activeTrack->mState unknown, activeTrackState immutable and is ACTIVE or RESUMING - buffer.frameCount = mFrameCount; - status_t status = mActiveTrack->getNextBuffer(&buffer); - if (status == NO_ERROR) { - readOnce = true; - size_t framesOut = buffer.frameCount; - if (mResampler == NULL) { - // no resampling - while (framesOut) { - size_t framesIn = mFrameCount - mRsmpInIndex; - if (framesIn) { - int8_t *src = (int8_t *)mRsmpInBuffer + mRsmpInIndex * mFrameSize; - int8_t *dst = buffer.i8 + (buffer.frameCount - framesOut) * - mActiveTrack->mFrameSize; - if (framesIn > framesOut) - framesIn = framesOut; - mRsmpInIndex += framesIn; - framesOut -= framesIn; - if (mChannelCount == mReqChannelCount) { - memcpy(dst, src, framesIn * mFrameSize); - } else { - if (mChannelCount == 1) { - upmix_to_stereo_i16_from_mono_i16((int16_t *)dst, - (int16_t *)src, framesIn); - } else { - downmix_to_mono_i16_from_stereo_i16((int16_t *)dst, - (int16_t *)src, framesIn); - } - } + for (size_t i = 0; i < effectChains.size(); i ++) { + // thread mutex is not locked, but effect chain is locked + effectChains[i]->process_l(); + } + + AudioBufferProvider::Buffer buffer; + buffer.frameCount = mFrameCount; + status_t status = activeTrack->getNextBuffer(&buffer); + if (status == NO_ERROR) { + readOnce = true; + size_t framesOut = buffer.frameCount; + if (mResampler == NULL) { + // no resampling + while (framesOut) { + size_t framesIn = mFrameCount - mRsmpInIndex; + if (framesIn > 0) { + int8_t *src = (int8_t *)mRsmpInBuffer + mRsmpInIndex * mFrameSize; + int8_t *dst = buffer.i8 + (buffer.frameCount - framesOut) * + activeTrack->mFrameSize; + if (framesIn > framesOut) { + framesIn = framesOut; } - if (framesOut && mFrameCount == mRsmpInIndex) { - void *readInto; - if (framesOut == mFrameCount && mChannelCount == mReqChannelCount) { - readInto = buffer.raw; - framesOut = 0; + mRsmpInIndex += framesIn; + framesOut -= framesIn; + if (mChannelCount == mReqChannelCount) { + memcpy(dst, src, framesIn * mFrameSize); + } else { + if (mChannelCount == 1) { + upmix_to_stereo_i16_from_mono_i16((int16_t *)dst, + (int16_t *)src, framesIn); } else { - readInto = mRsmpInBuffer; - mRsmpInIndex = 0; + downmix_to_mono_i16_from_stereo_i16((int16_t *)dst, + (int16_t *)src, framesIn); } - mBytesRead = mInput->stream->read(mInput->stream, readInto, - mBufferSize); - if (mBytesRead <= 0) { - if ((mBytesRead < 0) && (mActiveTrack->mState == TrackBase::ACTIVE)) - { - ALOGE("Error reading audio input"); - // Force input into standby so that it tries to - // recover at next read attempt - inputStandBy(); - usleep(kRecordThreadSleepUs); - } - mRsmpInIndex = mFrameCount; - framesOut = 0; - buffer.frameCount = 0; + } + } + if (framesOut > 0 && mFrameCount == mRsmpInIndex) { + void *readInto; + if (framesOut == mFrameCount && mChannelCount == mReqChannelCount) { + readInto = buffer.raw; + framesOut = 0; + } else { + readInto = mRsmpInBuffer; + mRsmpInIndex = 0; + } + mBytesRead = mInput->stream->read(mInput->stream, readInto, mBufferSize); + if (mBytesRead <= 0) { + // TODO: verify that it's benign to use a stale track state + if ((mBytesRead < 0) && (activeTrackState == TrackBase::ACTIVE)) + { + ALOGE("Error reading audio input"); + // Force input into standby so that it tries to + // recover at next read attempt + inputStandBy(); + doSleep = true; } + mRsmpInIndex = mFrameCount; + framesOut = 0; + buffer.frameCount = 0; + } #ifdef TEE_SINK - else if (mTeeSink != 0) { - (void) mTeeSink->write(readInto, - mBytesRead >> Format_frameBitShift(mTeeSink->format())); - } -#endif + else if (mTeeSink != 0) { + (void) mTeeSink->write(readInto, + mBytesRead >> Format_frameBitShift(mTeeSink->format())); } +#endif } - } else { - // resampling - - // resampler accumulates, but we only have one source track - memset(mRsmpOutBuffer, 0, framesOut * FCC_2 * sizeof(int32_t)); - // alter output frame count as if we were expecting stereo samples - if (mChannelCount == 1 && mReqChannelCount == 1) { - framesOut >>= 1; + } + } else { + // resampling + + // avoid busy-waiting if client doesn't keep up + bool madeProgress = false; + + // keep mRsmpInBuffer full so resampler always has sufficient input + for (;;) { + int32_t rear = mRsmpInRear; + ssize_t filled = rear - mRsmpInFront; + ALOG_ASSERT(0 <= filled && (size_t) filled <= mRsmpInFramesP2); + // exit once there is enough data in buffer for resampler + if ((size_t) filled >= mRsmpInFrames) { + break; } - mResampler->resample(mRsmpOutBuffer, framesOut, - this /* AudioBufferProvider* */); - // ditherAndClamp() works as long as all buffers returned by - // mActiveTrack->getNextBuffer() are 32 bit aligned which should be always true. - if (mChannelCount == 2 && mReqChannelCount == 1) { - // temporarily type pun mRsmpOutBuffer from Q19.12 to int16_t - ditherAndClamp(mRsmpOutBuffer, mRsmpOutBuffer, framesOut); - // the resampler always outputs stereo samples: - // do post stereo to mono conversion - downmix_to_mono_i16_from_stereo_i16(buffer.i16, (int16_t *)mRsmpOutBuffer, - framesOut); - } else { - ditherAndClamp((int32_t *)buffer.raw, mRsmpOutBuffer, framesOut); + size_t avail = mRsmpInFramesP2 - filled; + // Only try to read full HAL buffers. + // But if the HAL read returns a partial buffer, use it. + if (avail < mFrameCount) { + ALOGE("insufficient space to read: avail %d < mFrameCount %d", + avail, mFrameCount); + break; + } + // If 'avail' is non-contiguous, first read past the nominal end of buffer, then + // copy to the right place. Permitted because mRsmpInBuffer was over-allocated. + rear &= mRsmpInFramesP2 - 1; + mBytesRead = mInput->stream->read(mInput->stream, + &mRsmpInBuffer[rear * mChannelCount], mBufferSize); + if (mBytesRead <= 0) { + ALOGE("read failed: mBytesRead=%d < %u", mBytesRead, mBufferSize); + break; } - // now done with mRsmpOutBuffer + ALOG_ASSERT((size_t) mBytesRead <= mBufferSize); + size_t framesRead = mBytesRead / mFrameSize; + ALOG_ASSERT(framesRead > 0); + madeProgress = true; + // If 'avail' was non-contiguous, we now correct for reading past end of buffer. + size_t part1 = mRsmpInFramesP2 - rear; + if (framesRead > part1) { + memcpy(mRsmpInBuffer, &mRsmpInBuffer[mRsmpInFramesP2 * mChannelCount], + (framesRead - part1) * mFrameSize); + } + mRsmpInRear += framesRead; + } + if (!madeProgress) { + ALOGV("Did not make progress"); + usleep(((mFrameCount * 1000) / mSampleRate) * 1000); } - if (mFramestoDrop == 0) { - mActiveTrack->releaseBuffer(&buffer); + + // resampler accumulates, but we only have one source track + memset(mRsmpOutBuffer, 0, framesOut * FCC_2 * sizeof(int32_t)); + mResampler->resample(mRsmpOutBuffer, framesOut, + this /* AudioBufferProvider* */); + // ditherAndClamp() works as long as all buffers returned by + // activeTrack->getNextBuffer() are 32 bit aligned which should be always true. + if (mReqChannelCount == 1) { + // temporarily type pun mRsmpOutBuffer from Q19.12 to int16_t + ditherAndClamp(mRsmpOutBuffer, mRsmpOutBuffer, framesOut); + // the resampler always outputs stereo samples: + // do post stereo to mono conversion + downmix_to_mono_i16_from_stereo_i16(buffer.i16, (int16_t *)mRsmpOutBuffer, + framesOut); } else { - if (mFramestoDrop > 0) { - mFramestoDrop -= buffer.frameCount; - if (mFramestoDrop <= 0) { - clearSyncStartEvent(); - } - } else { - mFramestoDrop += buffer.frameCount; - if (mFramestoDrop >= 0 || mSyncStartEvent == 0 || - mSyncStartEvent->isCancelled()) { - ALOGW("Synced record %s, session %d, trigger session %d", - (mFramestoDrop >= 0) ? "timed out" : "cancelled", - mActiveTrack->sessionId(), - (mSyncStartEvent != 0) ? mSyncStartEvent->triggerSession() : 0); - clearSyncStartEvent(); - } - } + ditherAndClamp((int32_t *)buffer.raw, mRsmpOutBuffer, framesOut); } - mActiveTrack->clearOverflow(); + // now done with mRsmpOutBuffer + } - // client isn't retrieving buffers fast enough - else { - if (!mActiveTrack->setOverflow()) { - nsecs_t now = systemTime(); - if ((now - lastWarning) > kWarningThrottleNs) { - ALOGW("RecordThread: buffer overflow"); - lastWarning = now; + if (mFramestoDrop == 0) { + activeTrack->releaseBuffer(&buffer); + } else { + if (mFramestoDrop > 0) { + mFramestoDrop -= buffer.frameCount; + if (mFramestoDrop <= 0) { + clearSyncStartEvent(); + } + } else { + mFramestoDrop += buffer.frameCount; + if (mFramestoDrop >= 0 || mSyncStartEvent == 0 || + mSyncStartEvent->isCancelled()) { + ALOGW("Synced record %s, session %d, trigger session %d", + (mFramestoDrop >= 0) ? "timed out" : "cancelled", + activeTrack->sessionId(), + (mSyncStartEvent != 0) ? mSyncStartEvent->triggerSession() : 0); + clearSyncStartEvent(); } } - // Release the processor for a while before asking for a new buffer. - // This will give the application more chance to read from the buffer and - // clear the overflow. - usleep(kRecordThreadSleepUs); } + activeTrack->clearOverflow(); + } + // client isn't retrieving buffers fast enough + else { + if (!activeTrack->setOverflow()) { + nsecs_t now = systemTime(); + if ((now - lastWarning) > kWarningThrottleNs) { + ALOGW("RecordThread: buffer overflow"); + lastWarning = now; + } + } + // Release the processor for a while before asking for a new buffer. + // This will give the application more chance to read from the buffer and + // clear the overflow. + doSleep = true; } + // enable changes in effect chain unlockEffectChains(effectChains); - effectChains.clear(); + // effectChains doesn't need to be cleared, since it is cleared by destructor at scope end } - standby(); + standbyIfNotAlreadyInStandby(); { Mutex::Autolock _l(mLock); @@ -4636,7 +4816,8 @@ bool AudioFlinger::RecordThread::threadLoop() sp<RecordTrack> track = mTracks[i]; track->invalidate(); } - mActiveTrack.clear(); + mActiveTracks.clear(); + mActiveTracksGen++; mStartStopCond.broadcast(); } @@ -4646,7 +4827,7 @@ bool AudioFlinger::RecordThread::threadLoop() return false; } -void AudioFlinger::RecordThread::standby() +void AudioFlinger::RecordThread::standbyIfNotAlreadyInStandby() { if (!mStandby) { inputStandBy(); @@ -4659,18 +4840,19 @@ void AudioFlinger::RecordThread::inputStandBy() mInput->stream->common.standby(&mInput->stream->common); } -sp<AudioFlinger::RecordThread::RecordTrack> AudioFlinger::RecordThread::createRecordTrack_l( +sp<AudioFlinger::RecordThread::RecordTrack> AudioFlinger::RecordThread::createRecordTrack_l( const sp<AudioFlinger::Client>& client, uint32_t sampleRate, audio_format_t format, audio_channel_mask_t channelMask, - size_t frameCount, + size_t *pFrameCount, int sessionId, int uid, IAudioFlinger::track_flags_t *flags, pid_t tid, status_t *status) { + size_t frameCount = *pFrameCount; sp<RecordTrack> track; status_t lStatus; @@ -4679,6 +4861,7 @@ sp<AudioFlinger::RecordThread::RecordTrack> AudioFlinger::RecordThread::createR ALOGE("createRecordTrack_l() audio driver not initialized"); goto Exit; } + // client expresses a preference for FAST, but we get the final say if (*flags & IAudioFlinger::TRACK_FAST) { if ( @@ -4729,6 +4912,7 @@ sp<AudioFlinger::RecordThread::RecordTrack> AudioFlinger::RecordThread::createR } } } + *pFrameCount = frameCount; // FIXME use flags and tid similar to createTrack_l() @@ -4738,10 +4922,10 @@ sp<AudioFlinger::RecordThread::RecordTrack> AudioFlinger::RecordThread::createR track = new RecordTrack(this, client, sampleRate, format, channelMask, frameCount, sessionId, uid); - if (track->getCblk() == 0) { - ALOGE("createRecordTrack_l() no control block"); - lStatus = NO_MEMORY; - track.clear(); + lStatus = track->initCheck(); + if (lStatus != NO_ERROR) { + ALOGE("createRecordTrack_l() initCheck failed %d; no control block?", lStatus); + // track must be cleared from the caller as the caller has the AF lock goto Exit; } mTracks.add(track); @@ -4762,9 +4946,7 @@ sp<AudioFlinger::RecordThread::RecordTrack> AudioFlinger::RecordThread::createR lStatus = NO_ERROR; Exit: - if (status) { - *status = lStatus; - } + *status = lStatus; return track; } @@ -4795,43 +4977,57 @@ status_t AudioFlinger::RecordThread::start(RecordThread::RecordTrack* recordTrac } { + // This section is a rendezvous between binder thread executing start() and RecordThread AutoMutex lock(mLock); - if (mActiveTrack != 0) { - if (recordTrack != mActiveTrack.get()) { + if (mActiveTracks.size() > 0) { + // FIXME does not work for multiple active tracks + if (mActiveTracks.indexOf(recordTrack) != 0) { status = -EBUSY; - } else if (mActiveTrack->mState == TrackBase::PAUSING) { - mActiveTrack->mState = TrackBase::ACTIVE; + } else if (recordTrack->mState == TrackBase::PAUSING) { + recordTrack->mState = TrackBase::ACTIVE; } return status; } + // FIXME why? already set in constructor, 'STARTING_1' would be more accurate recordTrack->mState = TrackBase::IDLE; - mActiveTrack = recordTrack; + mActiveTracks.add(recordTrack); + mActiveTracksGen++; mLock.unlock(); status_t status = AudioSystem::startInput(mId); mLock.lock(); + // FIXME should verify that mActiveTrack is still == recordTrack if (status != NO_ERROR) { - mActiveTrack.clear(); + mActiveTracks.remove(recordTrack); + mActiveTracksGen++; clearSyncStartEvent(); return status; } + // FIXME LEGACY mRsmpInIndex = mFrameCount; + mRsmpInFront = 0; + mRsmpInRear = 0; + mRsmpInUnrel = 0; mBytesRead = 0; if (mResampler != NULL) { mResampler->reset(); } - mActiveTrack->mState = TrackBase::RESUMING; + // FIXME hijacking a playback track state name which was intended for start after pause; + // here 'STARTING_2' would be more accurate + recordTrack->mState = TrackBase::RESUMING; // signal thread to start ALOGV("Signal record thread"); mWaitWorkCV.broadcast(); // do not wait for mStartStopCond if exiting if (exitPending()) { - mActiveTrack.clear(); + mActiveTracks.remove(recordTrack); + mActiveTracksGen++; status = INVALID_OPERATION; goto startError; } + // FIXME incorrect usage of wait: no explicit predicate or loop mStartStopCond.wait(mLock); - if (mActiveTrack == 0) { + if (mActiveTracks.indexOf(recordTrack) < 0) { ALOGV("Record failed to start"); status = BAD_VALUE; goto startError; @@ -4877,29 +5073,31 @@ void AudioFlinger::RecordThread::handleSyncStartEvent(const sp<SyncEvent>& event bool AudioFlinger::RecordThread::stop(RecordThread::RecordTrack* recordTrack) { ALOGV("RecordThread::stop"); AutoMutex _l(mLock); - if (recordTrack != mActiveTrack.get() || recordTrack->mState == TrackBase::PAUSING) { + if (mActiveTracks.indexOf(recordTrack) != 0 || recordTrack->mState == TrackBase::PAUSING) { return false; } + // note that threadLoop may still be processing the track at this point [without lock] recordTrack->mState = TrackBase::PAUSING; // do not wait for mStartStopCond if exiting if (exitPending()) { return true; } + // FIXME incorrect usage of wait: no explicit predicate or loop mStartStopCond.wait(mLock); - // if we have been restarted, recordTrack == mActiveTrack.get() here - if (exitPending() || recordTrack != mActiveTrack.get()) { + // if we have been restarted, recordTrack is in mActiveTracks here + if (exitPending() || mActiveTracks.indexOf(recordTrack) != 0) { ALOGV("Record stopped OK"); return true; } return false; } -bool AudioFlinger::RecordThread::isValidSyncEvent(const sp<SyncEvent>& event) const +bool AudioFlinger::RecordThread::isValidSyncEvent(const sp<SyncEvent>& event __unused) const { return false; } -status_t AudioFlinger::RecordThread::setSyncEvent(const sp<SyncEvent>& event) +status_t AudioFlinger::RecordThread::setSyncEvent(const sp<SyncEvent>& event __unused) { #if 0 // This branch is currently dead code, but is preserved in case it will be needed in future if (!isValidSyncEvent(event)) { @@ -4930,7 +5128,7 @@ void AudioFlinger::RecordThread::destroyTrack_l(const sp<RecordTrack>& track) track->terminate(); track->mState = TrackBase::STOPPED; // active tracks are removed by threadLoop() - if (mActiveTrack != track) { + if (mActiveTracks.indexOf(track) < 0) { removeTrack_l(track); } } @@ -4950,104 +5148,110 @@ void AudioFlinger::RecordThread::dump(int fd, const Vector<String16>& args) void AudioFlinger::RecordThread::dumpInternals(int fd, const Vector<String16>& args) { - const size_t SIZE = 256; - char buffer[SIZE]; - String8 result; + fdprintf(fd, "\nInput thread %p:\n", this); - snprintf(buffer, SIZE, "\nInput thread %p internals\n", this); - result.append(buffer); - - if (mActiveTrack != 0) { - snprintf(buffer, SIZE, "In index: %zu\n", mRsmpInIndex); - result.append(buffer); - snprintf(buffer, SIZE, "Buffer size: %zu bytes\n", mBufferSize); - result.append(buffer); - snprintf(buffer, SIZE, "Resampling: %d\n", (mResampler != NULL)); - result.append(buffer); - snprintf(buffer, SIZE, "Out channel count: %u\n", mReqChannelCount); - result.append(buffer); - snprintf(buffer, SIZE, "Out sample rate: %u\n", mReqSampleRate); - result.append(buffer); + if (mActiveTracks.size() > 0) { + fdprintf(fd, " In index: %zu\n", mRsmpInIndex); + fdprintf(fd, " Buffer size: %zu bytes\n", mBufferSize); + fdprintf(fd, " Resampling: %d\n", (mResampler != NULL)); + fdprintf(fd, " Out channel count: %u\n", mReqChannelCount); + fdprintf(fd, " Out sample rate: %u\n", mReqSampleRate); } else { - result.append("No active record client\n"); + fdprintf(fd, " No active record client\n"); } - write(fd, result.string(), result.size()); - dumpBase(fd, args); } -void AudioFlinger::RecordThread::dumpTracks(int fd, const Vector<String16>& args) +void AudioFlinger::RecordThread::dumpTracks(int fd, const Vector<String16>& args __unused) { const size_t SIZE = 256; char buffer[SIZE]; String8 result; - snprintf(buffer, SIZE, "Input thread %p tracks\n", this); - result.append(buffer); - RecordTrack::appendDumpHeader(result); - for (size_t i = 0; i < mTracks.size(); ++i) { - sp<RecordTrack> track = mTracks[i]; - if (track != 0) { - track->dump(buffer, SIZE); - result.append(buffer); + size_t numtracks = mTracks.size(); + size_t numactive = mActiveTracks.size(); + size_t numactiveseen = 0; + fdprintf(fd, " %d Tracks", numtracks); + if (numtracks) { + fdprintf(fd, " of which %d are active\n", numactive); + RecordTrack::appendDumpHeader(result); + for (size_t i = 0; i < numtracks ; ++i) { + sp<RecordTrack> track = mTracks[i]; + if (track != 0) { + bool active = mActiveTracks.indexOf(track) >= 0; + if (active) { + numactiveseen++; + } + track->dump(buffer, SIZE, active); + result.append(buffer); + } } + } else { + fdprintf(fd, "\n"); } - if (mActiveTrack != 0) { - snprintf(buffer, SIZE, "\nInput thread %p active tracks\n", this); + if (numactiveseen != numactive) { + snprintf(buffer, SIZE, " The following tracks are in the active list but" + " not in the track list\n"); result.append(buffer); RecordTrack::appendDumpHeader(result); - mActiveTrack->dump(buffer, SIZE); - result.append(buffer); + for (size_t i = 0; i < numactive; ++i) { + sp<RecordTrack> track = mActiveTracks[i]; + if (mTracks.indexOf(track) < 0) { + track->dump(buffer, SIZE, true); + result.append(buffer); + } + } } write(fd, result.string(), result.size()); } // AudioBufferProvider interface -status_t AudioFlinger::RecordThread::getNextBuffer(AudioBufferProvider::Buffer* buffer, int64_t pts) -{ - size_t framesReq = buffer->frameCount; - size_t framesReady = mFrameCount - mRsmpInIndex; - int channelCount; - - if (framesReady == 0) { - mBytesRead = mInput->stream->read(mInput->stream, mRsmpInBuffer, mBufferSize); - if (mBytesRead <= 0) { - if ((mBytesRead < 0) && (mActiveTrack->mState == TrackBase::ACTIVE)) { - ALOGE("RecordThread::getNextBuffer() Error reading audio input"); - // Force input into standby so that it tries to - // recover at next read attempt - inputStandBy(); - usleep(kRecordThreadSleepUs); - } - buffer->raw = NULL; - buffer->frameCount = 0; - return NOT_ENOUGH_DATA; - } - mRsmpInIndex = 0; - framesReady = mFrameCount; - } - - if (framesReq > framesReady) { - framesReq = framesReady; - } - - if (mChannelCount == 1 && mReqChannelCount == 2) { - channelCount = 1; - } else { - channelCount = 2; - } - buffer->raw = mRsmpInBuffer + mRsmpInIndex * channelCount; - buffer->frameCount = framesReq; +status_t AudioFlinger::RecordThread::getNextBuffer(AudioBufferProvider::Buffer* buffer, int64_t pts __unused) +{ + int32_t rear = mRsmpInRear; + int32_t front = mRsmpInFront; + ssize_t filled = rear - front; + ALOG_ASSERT(0 <= filled && (size_t) filled <= mRsmpInFramesP2); + // 'filled' may be non-contiguous, so return only the first contiguous chunk + front &= mRsmpInFramesP2 - 1; + size_t part1 = mRsmpInFramesP2 - front; + if (part1 > (size_t) filled) { + part1 = filled; + } + size_t ask = buffer->frameCount; + ALOG_ASSERT(ask > 0); + if (part1 > ask) { + part1 = ask; + } + if (part1 == 0) { + // Higher-level should keep mRsmpInBuffer full, and not call resampler if empty + ALOGE("RecordThread::getNextBuffer() starved"); + buffer->raw = NULL; + buffer->frameCount = 0; + mRsmpInUnrel = 0; + return NOT_ENOUGH_DATA; + } + + buffer->raw = mRsmpInBuffer + front * mChannelCount; + buffer->frameCount = part1; + mRsmpInUnrel = part1; return NO_ERROR; } // AudioBufferProvider interface void AudioFlinger::RecordThread::releaseBuffer(AudioBufferProvider::Buffer* buffer) { - mRsmpInIndex += buffer->frameCount; + size_t stepCount = buffer->frameCount; + if (stepCount == 0) { + return; + } + ALOG_ASSERT(stepCount <= mRsmpInUnrel); + mRsmpInUnrel -= stepCount; + mRsmpInFront += stepCount; + buffer->raw = NULL; buffer->frameCount = 0; } @@ -5062,7 +5266,7 @@ bool AudioFlinger::RecordThread::checkForNewParameters_l() int value; audio_format_t reqFormat = mFormat; uint32_t reqSamplingRate = mReqSampleRate; - uint32_t reqChannelCount = mReqChannelCount; + audio_channel_mask_t reqChannelMask = audio_channel_in_mask_from_count(mReqChannelCount); if (param.getInt(String8(AudioParameter::keySamplingRate), value) == NO_ERROR) { reqSamplingRate = value; @@ -5077,14 +5281,19 @@ bool AudioFlinger::RecordThread::checkForNewParameters_l() } } if (param.getInt(String8(AudioParameter::keyChannels), value) == NO_ERROR) { - reqChannelCount = popcount(value); - reconfig = true; + audio_channel_mask_t mask = (audio_channel_mask_t) value; + if (mask != AUDIO_CHANNEL_IN_MONO && mask != AUDIO_CHANNEL_IN_STEREO) { + status = BAD_VALUE; + } else { + reqChannelMask = mask; + reconfig = true; + } } if (param.getInt(String8(AudioParameter::keyFrameCount), value) == NO_ERROR) { // do not accept frame count changes if tracks are open as the track buffer // size depends on frame count and correct behavior would not be guaranteed // if frame count is changed after track creation - if (mActiveTrack != 0) { + if (mActiveTracks.size() > 0) { status = INVALID_OPERATION; } else { reconfig = true; @@ -5127,6 +5336,7 @@ bool AudioFlinger::RecordThread::checkForNewParameters_l() } mAudioSource = (audio_source_t)value; } + if (status == NO_ERROR) { status = mInput->stream->common.set_parameters(&mInput->stream->common, keyValuePair.string()); @@ -5143,7 +5353,8 @@ bool AudioFlinger::RecordThread::checkForNewParameters_l() <= (2 * reqSamplingRate)) && popcount(mInput->stream->common.get_channels(&mInput->stream->common)) <= FCC_2 && - (reqChannelCount <= FCC_2)) { + (reqChannelMask == AUDIO_CHANNEL_IN_MONO || + reqChannelMask == AUDIO_CHANNEL_IN_STEREO)) { status = NO_ERROR; } if (status == NO_ERROR) { @@ -5177,9 +5388,9 @@ String8 AudioFlinger::RecordThread::getParameters(const String8& keys) return out_s8; } -void AudioFlinger::RecordThread::audioConfigChanged_l(int event, int param) { +void AudioFlinger::RecordThread::audioConfigChanged_l(int event, int param __unused) { AudioSystem::OutputDescriptor desc; - void *param2 = NULL; + const void *param2 = NULL; switch (event) { case AudioSystem::INPUT_OPENED: @@ -5213,39 +5424,32 @@ void AudioFlinger::RecordThread::readInputParameters() mChannelCount = popcount(mChannelMask); mFormat = mInput->stream->common.get_format(&mInput->stream->common); if (mFormat != AUDIO_FORMAT_PCM_16_BIT) { - ALOGE("HAL format %d not supported; must be AUDIO_FORMAT_PCM_16_BIT", mFormat); + ALOGE("HAL format %#x not supported; must be AUDIO_FORMAT_PCM_16_BIT", mFormat); } mFrameSize = audio_stream_frame_size(&mInput->stream->common); mBufferSize = mInput->stream->common.get_buffer_size(&mInput->stream->common); mFrameCount = mBufferSize / mFrameSize; - mRsmpInBuffer = new int16_t[mFrameCount * mChannelCount]; - - if (mSampleRate != mReqSampleRate && mChannelCount <= FCC_2 && mReqChannelCount <= FCC_2) - { - int channelCount; - // optimization: if mono to mono, use the resampler in stereo to stereo mode to avoid - // stereo to mono post process as the resampler always outputs stereo. - if (mChannelCount == 1 && mReqChannelCount == 2) { - channelCount = 1; - } else { - channelCount = 2; - } - mResampler = AudioResampler::create(16, channelCount, mReqSampleRate); + // With 3 HAL buffers, we can guarantee ability to down-sample the input by ratio of 2:1 to + // 1 full output buffer, regardless of the alignment of the available input. + mRsmpInFrames = mFrameCount * 3; + mRsmpInFramesP2 = roundup(mRsmpInFrames); + // Over-allocate beyond mRsmpInFramesP2 to permit a HAL read past end of buffer + mRsmpInBuffer = new int16_t[(mRsmpInFramesP2 + mFrameCount - 1) * mChannelCount]; + mRsmpInFront = 0; + mRsmpInRear = 0; + mRsmpInUnrel = 0; + + if (mSampleRate != mReqSampleRate && mChannelCount <= FCC_2 && mReqChannelCount <= FCC_2) { + mResampler = AudioResampler::create(16, (int) mChannelCount, mReqSampleRate); mResampler->setSampleRate(mSampleRate); mResampler->setVolume(AudioMixer::UNITY_GAIN, AudioMixer::UNITY_GAIN); + // resampler always outputs stereo mRsmpOutBuffer = new int32_t[mFrameCount * FCC_2]; - - // optmization: if mono to mono, alter input frame count as if we were inputing - // stereo samples - if (mChannelCount == 1 && mReqChannelCount == 1) { - mFrameCount >>= 1; - } - } mRsmpInIndex = mFrameCount; } -unsigned int AudioFlinger::RecordThread::getInputFramesLost() +uint32_t AudioFlinger::RecordThread::getInputFramesLost() { Mutex::Autolock _l(mLock); if (initCheck() != NO_ERROR) { diff --git a/services/audioflinger/Threads.h b/services/audioflinger/Threads.h index a2fb874..999fea3 100644 --- a/services/audioflinger/Threads.h +++ b/services/audioflinger/Threads.h @@ -36,6 +36,8 @@ public: audio_devices_t outDevice, audio_devices_t inDevice, type_t type); virtual ~ThreadBase(); + virtual status_t readyToRun(); + void dumpBase(int fd, const Vector<String16>& args); void dumpEffectChains(int fd, const Vector<String16>& args); @@ -63,7 +65,7 @@ public: class IoConfigEvent : public ConfigEvent { public: IoConfigEvent(int event, int param) : - ConfigEvent(CFG_EVENT_IO), mEvent(event), mParam(event) {} + ConfigEvent(CFG_EVENT_IO), mEvent(event), mParam(param) {} virtual ~IoConfigEvent() {} int event() const { return mEvent; } @@ -141,6 +143,7 @@ public: void sendIoConfigEvent_l(int event, int param = 0); void sendPrioConfigEvent_l(pid_t pid, pid_t tid, int32_t prio); void processConfigEvents(); + void processConfigEvents_l(); // see note at declaration of mStandby, mOutDevice and mInDevice bool standby() const { return mStandby; } @@ -156,7 +159,7 @@ public: int sessionId, effect_descriptor_t *desc, int *enabled, - status_t *status); + status_t *status /*non-NULL*/); void disconnectEffect(const sp< EffectModule>& effect, EffectHandle *handle, bool unpinIfLast); @@ -198,13 +201,13 @@ public: // effect void removeEffect_l(const sp< EffectModule>& effect); // detach all tracks connected to an auxiliary effect - virtual void detachAuxEffect_l(int effectId) {} + virtual void detachAuxEffect_l(int effectId __unused) {} // returns either EFFECT_SESSION if effects on this audio session exist in one // chain, or TRACK_SESSION if tracks on this audio session exist, or both virtual uint32_t hasAudioSession(int sessionId) const = 0; // the value returned by default implementation is not important as the // strategy is only meaningful for PlaybackThread which implements this method - virtual uint32_t getStrategyForSession_l(int sessionId) { return 0; } + virtual uint32_t getStrategyForSession_l(int sessionId __unused) { return 0; } // suspend or restore effect according to the type of effect passed. a NULL // type pointer means suspend all effects in the session @@ -275,6 +278,7 @@ protected: uint32_t mChannelCount; size_t mFrameSize; audio_format_t mFormat; + size_t mBufferSize; // HAL buffer size for read() or write() // Parameter sequence by client: binder thread calling setParameters(): // 1. Lock mLock @@ -303,12 +307,12 @@ protected: Vector<ConfigEvent *> mConfigEvents; // These fields are written and read by thread itself without lock or barrier, - // and read by other threads without lock or barrier via standby() , outDevice() + // and read by other threads without lock or barrier via standby(), outDevice() // and inDevice(). // Because of the absence of a lock or barrier, any other thread that reads // these fields must use the information in isolation, or be prepared to deal // with possibility that it might be inconsistent with other information. - bool mStandby; // Whether thread is currently in standby. + bool mStandby; // Whether thread is currently in standby. audio_devices_t mOutDevice; // output device audio_devices_t mInDevice; // input device audio_source_t mAudioSource; // (see audio.h, audio_source_t) @@ -358,7 +362,6 @@ public: void dump(int fd, const Vector<String16>& args); // Thread virtuals - virtual status_t readyToRun(); virtual bool threadLoop(); // RefBase @@ -391,7 +394,7 @@ protected: virtual bool waitingAsyncCallback(); virtual bool waitingAsyncCallback_l(); virtual bool shouldStandby_l(); - + virtual void onAddNewTrack_l(); // ThreadBase virtuals virtual void preExit(); @@ -419,13 +422,13 @@ public: uint32_t sampleRate, audio_format_t format, audio_channel_mask_t channelMask, - size_t frameCount, + size_t *pFrameCount, const sp<IMemory>& sharedBuffer, int sessionId, IAudioFlinger::track_flags_t *flags, pid_t tid, int uid, - status_t *status); + status_t *status /*non-NULL*/); AudioStreamOut* getOutput() const; AudioStreamOut* clearOutput(); @@ -479,7 +482,6 @@ protected: size_t mNormalFrameCount; // normal mixer and effects int16_t* mMixBuffer; // frame size aligned mix buffer - int8_t* mAllocMixBuffer; // mixer buffer allocation address // suspend count, > 0 means suspended. While suspended, the thread continues to pull from // tracks and mix, but doesn't write to HAL. A2DP and SCO HAL implementations can't handle @@ -623,13 +625,12 @@ private: sp<NBLog::Writer> mFastMixerNBLogWriter; public: virtual bool hasFastMixer() const = 0; - virtual FastTrackUnderruns getFastTrackUnderruns(size_t fastIndex) const + virtual FastTrackUnderruns getFastTrackUnderruns(size_t fastIndex __unused) const { FastTrackUnderruns dummy; return dummy; } protected: // accessed by both binder threads and within threadLoop(), lock on mutex needed unsigned mFastTrackAvailMask; // bit i set if fast track [i] is available - virtual void flushOutput_l(); private: // timestamp latch: @@ -748,11 +749,11 @@ protected: // threadLoop snippets virtual mixer_state prepareTracks_l(Vector< sp<Track> > *tracksToRemove); virtual void threadLoop_exit(); - virtual void flushOutput_l(); virtual bool waitingAsyncCallback(); virtual bool waitingAsyncCallback_l(); virtual bool shouldStandby_l(); + virtual void onAddNewTrack_l(); private: void flushHw_l(); @@ -867,23 +868,23 @@ public: // Thread virtuals virtual bool threadLoop(); - virtual status_t readyToRun(); // RefBase virtual void onFirstRef(); virtual status_t initCheck() const { return (mInput == NULL) ? NO_INIT : NO_ERROR; } + sp<AudioFlinger::RecordThread::RecordTrack> createRecordTrack_l( const sp<AudioFlinger::Client>& client, uint32_t sampleRate, audio_format_t format, audio_channel_mask_t channelMask, - size_t frameCount, + size_t *pFrameCount, int sessionId, int uid, IAudioFlinger::track_flags_t *flags, pid_t tid, - status_t *status); + status_t *status /*non-NULL*/); status_t start(RecordTrack* recordTrack, AudioSystem::sync_event_t event, @@ -905,7 +906,7 @@ public: virtual String8 getParameters(const String8& keys); virtual void audioConfigChanged_l(int event, int param = 0); void readInputParameters(); - virtual unsigned int getInputFramesLost(); + virtual uint32_t getInputFramesLost(); virtual status_t addEffectChain_l(const sp<EffectChain>& chain); virtual size_t removeEffectChain_l(const sp<EffectChain>& chain); @@ -926,30 +927,43 @@ public: bool hasFastRecorder() const { return false; } private: - void clearSyncStartEvent(); + void clearSyncStartEvent(); // Enter standby if not already in standby, and set mStandby flag - void standby(); + void standbyIfNotAlreadyInStandby(); // Call the HAL standby method unconditionally, and don't change mStandby flag - void inputStandBy(); + void inputStandBy(); AudioStreamIn *mInput; SortedVector < sp<RecordTrack> > mTracks; - // mActiveTrack has dual roles: it indicates the current active track, and + // mActiveTracks has dual roles: it indicates the current active track(s), and // is used together with mStartStopCond to indicate start()/stop() progress - sp<RecordTrack> mActiveTrack; + SortedVector< sp<RecordTrack> > mActiveTracks; + // generation counter for mActiveTracks + int mActiveTracksGen; Condition mStartStopCond; // updated by RecordThread::readInputParameters() AudioResampler *mResampler; // interleaved stereo pairs of fixed-point signed Q19.12 int32_t *mRsmpOutBuffer; - int16_t *mRsmpInBuffer; // [mFrameCount * mChannelCount] - size_t mRsmpInIndex; - size_t mBufferSize; // stream buffer size for read() + + // resampler converts input at HAL Hz to output at AudioRecord client Hz + int16_t *mRsmpInBuffer; // see new[] for details on the size + size_t mRsmpInFrames; // size of resampler input in frames + size_t mRsmpInFramesP2;// size rounded up to a power-of-2 + size_t mRsmpInUnrel; // unreleased frames remaining from + // most recent getNextBuffer + // these are rolling counters that are never cleared + int32_t mRsmpInFront; // next available frame + int32_t mRsmpInRear; // last filled frame + 1 + size_t mRsmpInIndex; // FIXME legacy + + // client's requested configuration, which may differ from the HAL configuration const uint32_t mReqChannelCount; const uint32_t mReqSampleRate; + ssize_t mBytesRead; // sync event triggering actual audio capture. Frames read before this event will // be dropped and therefore not read by the application. diff --git a/services/audioflinger/TrackBase.h b/services/audioflinger/TrackBase.h index cd201d9..05fde7c 100644 --- a/services/audioflinger/TrackBase.h +++ b/services/audioflinger/TrackBase.h @@ -48,6 +48,7 @@ public: int uid, bool isOut); virtual ~TrackBase(); + virtual status_t initCheck() const { return getCblk() != 0 ? NO_ERROR : NO_MEMORY; } virtual status_t start(AudioSystem::sync_event_t event, int triggerSession) = 0; @@ -78,15 +79,6 @@ protected: virtual uint32_t sampleRate() const { return mSampleRate; } - // Return a pointer to the start of a contiguous slice of the track buffer. - // Parameter 'offset' is the requested start position, expressed in - // monotonically increasing frame units relative to the track epoch. - // Parameter 'frames' is the requested length, also in frame units. - // Always returns non-NULL. It is the caller's responsibility to - // verify that this will be successful; the result of calling this - // function with invalid 'offset' or 'frames' is undefined. - void* getBuffer(uint32_t offset, uint32_t frames) const; - bool isStopped() const { return (mState == STOPPED || mState == FLUSHED); } diff --git a/services/audioflinger/Tracks.cpp b/services/audioflinger/Tracks.cpp index fccc7b8..e5152b8 100644 --- a/services/audioflinger/Tracks.cpp +++ b/services/audioflinger/Tracks.cpp @@ -116,12 +116,11 @@ AudioFlinger::ThreadBase::TrackBase::TrackBase( if (client != 0) { mCblkMemory = client->heap()->allocate(size); - if (mCblkMemory != 0) { - mCblk = static_cast<audio_track_cblk_t *>(mCblkMemory->pointer()); - // can't assume mCblk != NULL - } else { + if (mCblkMemory == 0 || + (mCblk = static_cast<audio_track_cblk_t *>(mCblkMemory->pointer())) == NULL) { ALOGE("not enough memory for AudioTrack size=%u", size); client->heap()->dump("AudioTrack"); + mCblkMemory.clear(); return; } } else { @@ -134,7 +133,6 @@ AudioFlinger::ThreadBase::TrackBase::TrackBase( if (mCblk != NULL) { new(mCblk) audio_track_cblk_t(); // clear all buffers - mCblk->frameCount_ = frameCount; if (sharedBuffer == 0) { mBuffer = (char*)mCblk + sizeof(audio_track_cblk_t); memset(mBuffer, 0, bufferSize); @@ -148,7 +146,7 @@ AudioFlinger::ThreadBase::TrackBase::TrackBase( #ifdef TEE_SINK if (mTeeSinkTrackEnabled) { NBAIO_Format pipeFormat = Format_from_SR_C(mSampleRate, mChannelCount); - if (pipeFormat != Format_Invalid) { + if (Format_isValid(pipeFormat)) { Pipe *pipe = new Pipe(mTeeSinkTrackFrames, pipeFormat); size_t numCounterOffers = 0; const NBAIO_Format offers[1] = {pipeFormat}; @@ -275,6 +273,11 @@ status_t AudioFlinger::TrackHandle::queueTimedBuffer(const sp<IMemory>& buffer, if (!mTrack->isTimedTrack()) return INVALID_OPERATION; + if (buffer == 0 || buffer->pointer() == NULL) { + ALOGE("queueTimedBuffer() buffer is 0 or has NULL pointer()"); + return BAD_VALUE; + } + PlaybackThread::TimedTrack* tt = reinterpret_cast<PlaybackThread::TimedTrack*>(mTrack.get()); return tt->queueTimedBuffer(buffer, pts); @@ -344,7 +347,8 @@ AudioFlinger::PlaybackThread::Track::Track( mCachedVolume(1.0), mIsInvalid(false), mAudioTrackServerProxy(NULL), - mResumeToStopping(false) + mResumeToStopping(false), + mFlushHwPending(false) { if (mCblk != NULL) { if (sharedBuffer == 0) { @@ -396,6 +400,15 @@ AudioFlinger::PlaybackThread::Track::~Track() } } +status_t AudioFlinger::PlaybackThread::Track::initCheck() const +{ + status_t status = TrackBase::initCheck(); + if (status == NO_ERROR && mName < 0) { + status = NO_MEMORY; + } + return status; +} + void AudioFlinger::PlaybackThread::Track::destroy() { // NOTE: destroyTrack_l() can remove a strong reference to this Track @@ -422,17 +435,19 @@ void AudioFlinger::PlaybackThread::Track::destroy() /*static*/ void AudioFlinger::PlaybackThread::Track::appendDumpHeader(String8& result) { - result.append(" Name Client Type Fmt Chn mask Session fCount S F SRate " + result.append(" Name Active Client Type Fmt Chn mask Session fCount S F SRate " "L dB R dB Server Main buf Aux Buf Flags UndFrmCnt\n"); } -void AudioFlinger::PlaybackThread::Track::dump(char* buffer, size_t size) +void AudioFlinger::PlaybackThread::Track::dump(char* buffer, size_t size, bool active) { uint32_t vlr = mAudioTrackServerProxy->getVolumeLR(); if (isFastTrack()) { - sprintf(buffer, " F %2d", mFastIndex); + sprintf(buffer, " F %2d", mFastIndex); + } else if (mName >= AudioMixer::TRACK0) { + sprintf(buffer, " %4d", mName - AudioMixer::TRACK0); } else { - sprintf(buffer, " %4d", mName - AudioMixer::TRACK0); + sprintf(buffer, " none"); } track_state state = mState; char stateChar; @@ -487,8 +502,9 @@ void AudioFlinger::PlaybackThread::Track::dump(char* buffer, size_t size) nowInUnderrun = '?'; break; } - snprintf(&buffer[7], size-7, " %6u %4u %08X %08X %7u %6zu %1c %1d %5u %5.2g %5.2g " + snprintf(&buffer[8], size-8, " %6s %6u %4u %08X %08X %7u %6zu %1c %1d %5u %5.2g %5.2g " "%08X %p %p 0x%03X %9u%c\n", + active ? "yes" : "no", (mClient == 0) ? getpid_cached : mClient->pid(), mStreamType, mFormat, @@ -514,7 +530,7 @@ uint32_t AudioFlinger::PlaybackThread::Track::sampleRate() const { // AudioBufferProvider interface status_t AudioFlinger::PlaybackThread::Track::getNextBuffer( - AudioBufferProvider::Buffer* buffer, int64_t pts) + AudioBufferProvider::Buffer* buffer, int64_t pts __unused) { ServerProxy::Buffer buf; size_t desiredFrames = buffer->frameCount; @@ -551,7 +567,7 @@ size_t AudioFlinger::PlaybackThread::Track::framesReleased() const // Don't call for fast tracks; the framesReady() could result in priority inversion bool AudioFlinger::PlaybackThread::Track::isReady() const { - if (mFillingUpStatus != FS_FILLING || isStopped() || isPausing()) { + if (mFillingUpStatus != FS_FILLING || isStopped() || isPausing() || isStopping()) { return true; } @@ -564,8 +580,8 @@ bool AudioFlinger::PlaybackThread::Track::isReady() const { return false; } -status_t AudioFlinger::PlaybackThread::Track::start(AudioSystem::sync_event_t event, - int triggerSession) +status_t AudioFlinger::PlaybackThread::Track::start(AudioSystem::sync_event_t event __unused, + int triggerSession __unused) { status_t status = NO_ERROR; ALOGV("start(%d), calling pid %d session %d", @@ -719,6 +735,7 @@ void AudioFlinger::PlaybackThread::Track::flush() mRetryCount = PlaybackThread::kMaxTrackRetriesOffload; } + mFlushHwPending = true; mResumeToStopping = false; } else { if (mState != STOPPING_1 && mState != STOPPING_2 && mState != STOPPED && @@ -739,11 +756,19 @@ void AudioFlinger::PlaybackThread::Track::flush() // Prevent flush being lost if the track is flushed and then resumed // before mixer thread can run. This is important when offloading // because the hardware buffer could hold a large amount of audio - playbackThread->flushOutput_l(); playbackThread->broadcast_l(); } } +// must be called with thread lock held +void AudioFlinger::PlaybackThread::Track::flushAck() +{ + if (!isOffloaded()) + return; + + mFlushHwPending = false; +} + void AudioFlinger::PlaybackThread::Track::reset() { // Do not reset twice to avoid discarding data written just after a flush and before @@ -979,7 +1004,8 @@ AudioFlinger::PlaybackThread::TimedTrack::create( size_t frameCount, const sp<IMemory>& sharedBuffer, int sessionId, - int uid) { + int uid) +{ if (!client->reserveTimedTrack()) return 0; @@ -1045,15 +1071,14 @@ status_t AudioFlinger::PlaybackThread::TimedTrack::allocateTimedBuffer( mTimedMemoryDealer = new MemoryDealer(kTimedBufferHeapSize, "AudioFlingerTimed"); - if (mTimedMemoryDealer == NULL) + if (mTimedMemoryDealer == NULL) { return NO_MEMORY; + } } sp<IMemory> newBuffer = mTimedMemoryDealer->allocate(size); - if (newBuffer == NULL) { - newBuffer = mTimedMemoryDealer->allocate(size); - if (newBuffer == NULL) - return NO_MEMORY; + if (newBuffer == 0 || newBuffer->pointer() == NULL) { + return NO_MEMORY; } *buffer = newBuffer; @@ -1152,7 +1177,7 @@ void AudioFlinger::PlaybackThread::TimedTrack::trimTimedBufferQueueHead_l( void AudioFlinger::PlaybackThread::TimedTrack::updateFramesPendingAfterTrim_l( const TimedBuffer& buf, - const char* logTag) { + const char* logTag __unused) { uint32_t bufBytes = buf.buffer()->size(); uint32_t consumedAlready = buf.position(); @@ -1504,9 +1529,9 @@ AudioFlinger::PlaybackThread::OutputTrack::OutputTrack( mOutBuffer.frameCount = 0; playbackThread->mTracks.add(this); ALOGV("OutputTrack constructor mCblk %p, mBuffer %p, " - "mCblk->frameCount_ %u, mChannelMask 0x%08x", + "frameCount %u, mChannelMask 0x%08x", mCblk, mBuffer, - mCblk->frameCount_, mChannelMask); + frameCount, mChannelMask); // since client and server are in the same process, // the buffer has the same virtual address on both sides mClientProxy = new AudioTrackClientProxy(mCblk, mBuffer, mFrameCount, mFrameSize); @@ -1764,9 +1789,7 @@ AudioFlinger::RecordThread::RecordTrack::RecordTrack( { ALOGV("RecordTrack constructor"); if (mCblk != NULL) { - mAudioRecordServerProxy = new AudioRecordServerProxy(mCblk, mBuffer, frameCount, - mFrameSize); - mServerProxy = mAudioRecordServerProxy; + mServerProxy = new AudioRecordServerProxy(mCblk, mBuffer, frameCount, mFrameSize); } } @@ -1777,7 +1800,7 @@ AudioFlinger::RecordThread::RecordTrack::~RecordTrack() // AudioBufferProvider interface status_t AudioFlinger::RecordThread::RecordTrack::getNextBuffer(AudioBufferProvider::Buffer* buffer, - int64_t pts) + int64_t pts __unused) { ServerProxy::Buffer buf; buf.mFrameCount = buffer->frameCount; @@ -1845,12 +1868,13 @@ void AudioFlinger::RecordThread::RecordTrack::invalidate() /*static*/ void AudioFlinger::RecordThread::RecordTrack::appendDumpHeader(String8& result) { - result.append("Client Fmt Chn mask Session S Server fCount\n"); + result.append(" Active Client Fmt Chn mask Session S Server fCount\n"); } -void AudioFlinger::RecordThread::RecordTrack::dump(char* buffer, size_t size) +void AudioFlinger::RecordThread::RecordTrack::dump(char* buffer, size_t size, bool active) { - snprintf(buffer, size, "%6u %3u %08X %7u %1d %08X %6zu\n", + snprintf(buffer, size, " %6s %6u %3u %08X %7u %1d %08X %6zu\n", + active ? "yes" : "no", (mClient == 0) ? getpid_cached : mClient->pid(), mFormat, mChannelMask, diff --git a/services/audioflinger/test-resample.cpp b/services/audioflinger/test-resample.cpp index 7a314cf..66fcd90 100644 --- a/services/audioflinger/test-resample.cpp +++ b/services/audioflinger/test-resample.cpp @@ -26,54 +26,30 @@ #include <errno.h> #include <time.h> #include <math.h> +#include <audio_utils/sndfile.h> using namespace android; -struct HeaderWav { - HeaderWav(size_t size, int nc, int sr, int bits) { - strncpy(RIFF, "RIFF", 4); - chunkSize = size + sizeof(HeaderWav); - strncpy(WAVE, "WAVE", 4); - strncpy(fmt, "fmt ", 4); - fmtSize = 16; - audioFormat = 1; - numChannels = nc; - samplesRate = sr; - byteRate = sr * numChannels * (bits/8); - align = nc*(bits/8); - bitsPerSample = bits; - strncpy(data, "data", 4); - dataSize = size; - } - - char RIFF[4]; // RIFF - uint32_t chunkSize; // File size - char WAVE[4]; // WAVE - char fmt[4]; // fmt\0 - uint32_t fmtSize; // fmt size - uint16_t audioFormat; // 1=PCM - uint16_t numChannels; // num channels - uint32_t samplesRate; // sample rate in hz - uint32_t byteRate; // Bps - uint16_t align; // 2=16-bit mono, 4=16-bit stereo - uint16_t bitsPerSample; // bits per sample - char data[4]; // "data" - uint32_t dataSize; // size -}; +bool gVerbose = false; static int usage(const char* name) { - fprintf(stderr,"Usage: %s [-p] [-h] [-s] [-q {dq|lq|mq|hq|vhq}] [-i input-sample-rate] " - "[-o output-sample-rate] [<input-file>] <output-file>\n", name); + fprintf(stderr,"Usage: %s [-p] [-h] [-v] [-s] [-q {dq|lq|mq|hq|vhq|dlq|dmq|dhq}]" + " [-i input-sample-rate] [-o output-sample-rate] [<input-file>]" + " <output-file>\n", name); fprintf(stderr," -p enable profiling\n"); fprintf(stderr," -h create wav file\n"); - fprintf(stderr," -s stereo\n"); + fprintf(stderr," -v verbose : log buffer provider calls\n"); + fprintf(stderr," -s stereo (ignored if input file is specified)\n"); fprintf(stderr," -q resampler quality\n"); fprintf(stderr," dq : default quality\n"); fprintf(stderr," lq : low quality\n"); fprintf(stderr," mq : medium quality\n"); fprintf(stderr," hq : high quality\n"); fprintf(stderr," vhq : very high quality\n"); - fprintf(stderr," -i input file sample rate\n"); + fprintf(stderr," dlq : dynamic low quality\n"); + fprintf(stderr," dmq : dynamic medium quality\n"); + fprintf(stderr," dhq : dynamic high quality\n"); + fprintf(stderr," -i input file sample rate (ignored if input file is specified)\n"); fprintf(stderr," -o output file sample rate\n"); return -1; } @@ -81,7 +57,8 @@ static int usage(const char* name) { int main(int argc, char* argv[]) { const char* const progname = argv[0]; - bool profiling = false; + bool profileResample = false; + bool profileFilter = false; bool writeHeader = false; int channels = 1; int input_freq = 0; @@ -89,14 +66,20 @@ int main(int argc, char* argv[]) { AudioResampler::src_quality quality = AudioResampler::DEFAULT_QUALITY; int ch; - while ((ch = getopt(argc, argv, "phsq:i:o:")) != -1) { + while ((ch = getopt(argc, argv, "pfhvsq:i:o:")) != -1) { switch (ch) { case 'p': - profiling = true; + profileResample = true; + break; + case 'f': + profileFilter = true; break; case 'h': writeHeader = true; break; + case 'v': + gVerbose = true; + break; case 's': channels = 2; break; @@ -111,6 +94,12 @@ int main(int argc, char* argv[]) { quality = AudioResampler::HIGH_QUALITY; else if (!strcmp(optarg, "vhq")) quality = AudioResampler::VERY_HIGH_QUALITY; + else if (!strcmp(optarg, "dlq")) + quality = AudioResampler::DYN_LOW_QUALITY; + else if (!strcmp(optarg, "dmq")) + quality = AudioResampler::DYN_MED_QUALITY; + else if (!strcmp(optarg, "dhq")) + quality = AudioResampler::DYN_HIGH_QUALITY; else { usage(progname); return -1; @@ -148,25 +137,22 @@ int main(int argc, char* argv[]) { size_t input_size; void* input_vaddr; if (argc == 2) { - struct stat st; - if (stat(file_in, &st) < 0) { - fprintf(stderr, "stat: %s\n", strerror(errno)); - return -1; - } - - int input_fd = open(file_in, O_RDONLY); - if (input_fd < 0) { - fprintf(stderr, "open: %s\n", strerror(errno)); - return -1; - } - - input_size = st.st_size; - input_vaddr = mmap(0, input_size, PROT_READ, MAP_PRIVATE, input_fd, 0); - if (input_vaddr == MAP_FAILED ) { - fprintf(stderr, "mmap: %s\n", strerror(errno)); - return -1; + SF_INFO info; + info.format = 0; + SNDFILE *sf = sf_open(file_in, SFM_READ, &info); + if (sf == NULL) { + perror(file_in); + return EXIT_FAILURE; } + input_size = info.frames * info.channels * sizeof(short); + input_vaddr = malloc(input_size); + (void) sf_readf_short(sf, (short *) input_vaddr, info.frames); + sf_close(sf); + channels = info.channels; + input_freq = info.samplerate; } else { + // data for testing is exactly (input sampling rate/1000)/2 seconds + // so 44.1khz input is 22.05 seconds double k = 1000; // Hz / s double time = (input_freq / 2) / k; size_t input_frames = size_t(input_freq * time); @@ -178,7 +164,7 @@ int main(int argc, char* argv[]) { double y = sin(M_PI * k * t * t); int16_t yi = floor(y * 32767.0 + 0.5); for (size_t j=0 ; j<(size_t)channels ; j++) { - in[i*channels + j] = yi / (1+j); + in[i*channels + j] = yi / (1+j); // right ch. 1/2 left ch. } } } @@ -186,89 +172,238 @@ int main(int argc, char* argv[]) { // ---------------------------------------------------------- class Provider: public AudioBufferProvider { - int16_t* mAddr; - size_t mNumFrames; + int16_t* const mAddr; // base address + const size_t mNumFrames; // total frames + const int mChannels; + size_t mNextFrame; // index of next frame to provide + size_t mUnrel; // number of frames not yet released public: - Provider(const void* addr, size_t size, int channels) { - mAddr = (int16_t*) addr; - mNumFrames = size / (channels*sizeof(int16_t)); + Provider(const void* addr, size_t size, int channels) + : mAddr((int16_t*) addr), + mNumFrames(size / (channels*sizeof(int16_t))), + mChannels(channels), + mNextFrame(0), mUnrel(0) { } virtual status_t getNextBuffer(Buffer* buffer, int64_t pts = kInvalidPTS) { - buffer->frameCount = mNumFrames; - buffer->i16 = mAddr; - return NO_ERROR; + (void)pts; // suppress warning + size_t requestedFrames = buffer->frameCount; + if (requestedFrames > mNumFrames - mNextFrame) { + buffer->frameCount = mNumFrames - mNextFrame; + } + if (gVerbose) { + printf("getNextBuffer() requested %u frames out of %u frames available," + " and returned %u frames\n", + requestedFrames, mNumFrames - mNextFrame, buffer->frameCount); + } + mUnrel = buffer->frameCount; + if (buffer->frameCount > 0) { + buffer->i16 = &mAddr[mChannels * mNextFrame]; + return NO_ERROR; + } else { + buffer->i16 = NULL; + return NOT_ENOUGH_DATA; + } } virtual void releaseBuffer(Buffer* buffer) { + if (buffer->frameCount > mUnrel) { + fprintf(stderr, "ERROR releaseBuffer() released %u frames but only %u available " + "to release\n", buffer->frameCount, mUnrel); + mNextFrame += mUnrel; + mUnrel = 0; + } else { + if (gVerbose) { + printf("releaseBuffer() released %u frames out of %u frames available " + "to release\n", buffer->frameCount, mUnrel); + } + mNextFrame += buffer->frameCount; + mUnrel -= buffer->frameCount; + } + buffer->frameCount = 0; + buffer->i16 = NULL; + } + void reset() { + mNextFrame = 0; } } provider(input_vaddr, input_size, channels); size_t input_frames = input_size / (channels * sizeof(int16_t)); + if (gVerbose) { + printf("%u input frames\n", input_frames); + } size_t output_size = 2 * 4 * ((int64_t) input_frames * output_freq) / input_freq; output_size &= ~7; // always stereo, 32-bits - void* output_vaddr = malloc(output_size); - - if (profiling) { + if (profileFilter) { + // Check how fast sample rate changes are that require filter changes. + // The delta sample rate changes must indicate a downsampling ratio, + // and must be larger than 10% changes. + // + // On fast devices, filters should be generated between 0.1ms - 1ms. + // (single threaded). AudioResampler* resampler = AudioResampler::create(16, channels, - output_freq, quality); - - size_t out_frames = output_size/8; - resampler->setSampleRate(input_freq); - resampler->setVolume(0x1000, 0x1000); - - memset(output_vaddr, 0, output_size); + 8000, quality); + int looplimit = 100; timespec start, end; clock_gettime(CLOCK_MONOTONIC, &start); - resampler->resample((int*) output_vaddr, out_frames, &provider); - resampler->resample((int*) output_vaddr, out_frames, &provider); - resampler->resample((int*) output_vaddr, out_frames, &provider); - resampler->resample((int*) output_vaddr, out_frames, &provider); + for (int i = 0; i < looplimit; ++i) { + resampler->setSampleRate(9000); + resampler->setSampleRate(12000); + resampler->setSampleRate(20000); + resampler->setSampleRate(30000); + } clock_gettime(CLOCK_MONOTONIC, &end); int64_t start_ns = start.tv_sec * 1000000000LL + start.tv_nsec; int64_t end_ns = end.tv_sec * 1000000000LL + end.tv_nsec; - int64_t time = (end_ns - start_ns)/4; - printf("%f Mspl/s\n", out_frames/(time/1e9)/1e6); + int64_t time = end_ns - start_ns; + printf("%.2f sample rate changes with filter calculation/sec\n", + looplimit * 4 / (time / 1e9)); + // Check how fast sample rate changes are without filter changes. + // This should be very fast, probably 0.1us - 1us per sample rate + // change. + resampler->setSampleRate(1000); + looplimit = 1000; + clock_gettime(CLOCK_MONOTONIC, &start); + for (int i = 0; i < looplimit; ++i) { + resampler->setSampleRate(1000+i); + } + clock_gettime(CLOCK_MONOTONIC, &end); + start_ns = start.tv_sec * 1000000000LL + start.tv_nsec; + end_ns = end.tv_sec * 1000000000LL + end.tv_nsec; + time = end_ns - start_ns; + printf("%.2f sample rate changes without filter calculation/sec\n", + looplimit / (time / 1e9)); + resampler->reset(); delete resampler; } + void* output_vaddr = malloc(output_size); AudioResampler* resampler = AudioResampler::create(16, channels, output_freq, quality); size_t out_frames = output_size/8; + + /* set volume precision to 12 bits, so the volume scale is 1<<12. + * This means the "integer" part fits in the Q19.12 precision + * representation of output int32_t. + * + * Generally 0 < volumePrecision <= 14 (due to the limits of + * int16_t values for Volume). volumePrecision cannot be 0 due + * to rounding and shifts. + */ + const int volumePrecision = 12; // in bits + resampler->setSampleRate(input_freq); - resampler->setVolume(0x1000, 0x1000); + resampler->setVolume(1 << volumePrecision, 1 << volumePrecision); + + if (profileResample) { + /* + * For profiling on mobile devices, upon experimentation + * it is better to run a few trials with a shorter loop limit, + * and take the minimum time. + * + * Long tests can cause CPU temperature to build up and thermal throttling + * to reduce CPU frequency. + * + * For frequency checks (index=0, or 1, etc.): + * "cat /sys/devices/system/cpu/cpu${index}/cpufreq/scaling_*_freq" + * + * For temperature checks (index=0, or 1, etc.): + * "cat /sys/class/thermal/thermal_zone${index}/temp" + * + * Another way to avoid thermal throttling is to fix the CPU frequency + * at a lower level which prevents excessive temperatures. + */ + const int trials = 4; + const int looplimit = 4; + timespec start, end; + int64_t time; + + for (int n = 0; n < trials; ++n) { + clock_gettime(CLOCK_MONOTONIC, &start); + for (int i = 0; i < looplimit; ++i) { + resampler->resample((int*) output_vaddr, out_frames, &provider); + provider.reset(); // during benchmarking reset only the provider + } + clock_gettime(CLOCK_MONOTONIC, &end); + int64_t start_ns = start.tv_sec * 1000000000LL + start.tv_nsec; + int64_t end_ns = end.tv_sec * 1000000000LL + end.tv_nsec; + int64_t diff_ns = end_ns - start_ns; + if (n == 0 || diff_ns < time) { + time = diff_ns; // save the best out of our trials. + } + } + // Mfrms/s is "Millions of output frames per second". + printf("quality: %d channels: %d msec: %lld Mfrms/s: %.2lf\n", + quality, channels, time/1000000, out_frames * looplimit / (time / 1e9) / 1e6); + resampler->reset(); + } memset(output_vaddr, 0, output_size); + if (gVerbose) { + printf("resample() %u output frames\n", out_frames); + } resampler->resample((int*) output_vaddr, out_frames, &provider); + if (gVerbose) { + printf("resample() complete\n"); + } + resampler->reset(); + if (gVerbose) { + printf("reset() complete\n"); + } + delete resampler; + resampler = NULL; - // down-mix (we just truncate and keep the left channel) + // mono takes left channel only + // stereo right channel is half amplitude of stereo left channel (due to input creation) int32_t* out = (int32_t*) output_vaddr; int16_t* convert = (int16_t*) malloc(out_frames * channels * sizeof(int16_t)); + + // round to half towards zero and saturate at int16 (non-dithered) + const int roundVal = (1<<(volumePrecision-1)) - 1; // volumePrecision > 0 + for (size_t i = 0; i < out_frames; i++) { - for (int j=0 ; j<channels ; j++) { - int32_t s = out[i * 2 + j] >> 12; - if (s > 32767) s = 32767; - else if (s < -32768) s = -32768; + for (int j = 0; j < channels; j++) { + int32_t s = out[i * 2 + j] + roundVal; // add offset here + if (s < 0) { + s = (s + 1) >> volumePrecision; // round to 0 + if (s < -32768) { + s = -32768; + } + } else { + s = s >> volumePrecision; + if (s > 32767) { + s = 32767; + } + } convert[i * channels + j] = int16_t(s); } } // write output to disk - int output_fd = open(file_out, O_WRONLY | O_CREAT | O_TRUNC, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); - if (output_fd < 0) { - fprintf(stderr, "open: %s\n", strerror(errno)); - return -1; - } - if (writeHeader) { - HeaderWav wav(out_frames * channels * sizeof(int16_t), channels, output_freq, 16); - write(output_fd, &wav, sizeof(wav)); + SF_INFO info; + info.frames = 0; + info.samplerate = output_freq; + info.channels = channels; + info.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16; + SNDFILE *sf = sf_open(file_out, SFM_WRITE, &info); + if (sf == NULL) { + perror(file_out); + return EXIT_FAILURE; + } + (void) sf_writef_short(sf, convert, out_frames); + sf_close(sf); + } else { + int output_fd = open(file_out, O_WRONLY | O_CREAT | O_TRUNC, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (output_fd < 0) { + perror(file_out); + return EXIT_FAILURE; + } + write(output_fd, convert, out_frames * channels * sizeof(int16_t)); + close(output_fd); } - write(output_fd, convert, out_frames * channels * sizeof(int16_t)); - close(output_fd); - - return 0; + return EXIT_SUCCESS; } |