diff options
-rw-r--r-- | services/audioflinger/FastMixer.cpp | 130 | ||||
-rw-r--r-- | services/audioflinger/FastMixer.h | 20 |
2 files changed, 128 insertions, 22 deletions
diff --git a/services/audioflinger/FastMixer.cpp b/services/audioflinger/FastMixer.cpp index bf264be..d499f7a 100644 --- a/services/audioflinger/FastMixer.cpp +++ b/services/audioflinger/FastMixer.cpp @@ -23,6 +23,7 @@ #include <system/audio.h> #ifdef FAST_MIXER_STATISTICS #include <cpustats/CentralTendencyStatistics.h> +#include <cpustats/ThreadCpuUsage.h> #endif #include "AudioMixer.h" #include "FastMixer.h" @@ -65,8 +66,11 @@ bool FastMixer::threadLoop() FastMixerDumpState dummyDumpState, *dumpState = &dummyDumpState; bool ignoreNextOverrun = true; // used to ignore initial overrun and first after an underrun #ifdef FAST_MIXER_STATISTICS - CentralTendencyStatistics cts; // cycle times in seconds - static const unsigned kMaxSamples = 1000; + struct timespec oldLoad = {0, 0}; // previous value of clock_gettime(CLOCK_THREAD_CPUTIME_ID) + bool oldLoadValid = false; // whether oldLoad is valid + uint32_t bounds = 0; + bool full = false; // whether we have collected at least kSamplingN samples + ThreadCpuUsage tcu; // for reading the current CPU clock frequency in kHz #endif unsigned coldGen = 0; // last observed mColdGen bool isWarm = false; // true means ready to mix, false means wait for warmup before mixing @@ -116,6 +120,7 @@ bool FastMixer::threadLoop() preIdle = *current; current = &preIdle; oldTsValid = false; + oldLoadValid = false; ignoreNextOverrun = true; } previous = current; @@ -151,6 +156,8 @@ bool FastMixer::threadLoop() warmupCycles = 0; sleepNs = -1; coldGen = current->mColdGen; + bounds = 0; + full = false; } else { sleepNs = FAST_HOT_IDLE_NS; } @@ -451,15 +458,54 @@ bool FastMixer::threadLoop() ignoreNextOverrun = false; } #ifdef FAST_MIXER_STATISTICS - // long-term statistics - cts.sample(sec + nsec * 1e-9); - if (cts.n() >= kMaxSamples) { - dumpState->mMean = cts.mean(); - dumpState->mMinimum = cts.minimum(); - dumpState->mMaximum = cts.maximum(); - dumpState->mStddev = cts.stddev(); - cts.reset(); + // advance the FIFO queue bounds + size_t i = bounds & (FastMixerDumpState::kSamplingN - 1); + bounds = (bounds + 1) & 0xFFFF; + if (full) { + bounds += 0x10000; + } else if (!(bounds & (FastMixerDumpState::kSamplingN - 1))) { + full = true; } + // compute the delta value of clock_gettime(CLOCK_MONOTONIC) + uint32_t monotonicNs = nsec; + if (sec > 0 && sec < 4) { + monotonicNs += sec * 1000000000; + } + // compute the raw CPU load = delta value of clock_gettime(CLOCK_THREAD_CPUTIME_ID) + uint32_t loadNs = 0; + struct timespec newLoad; + rc = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &newLoad); + if (rc == 0) { + if (oldLoadValid) { + sec = newLoad.tv_sec - oldLoad.tv_sec; + nsec = newLoad.tv_nsec - oldLoad.tv_nsec; + if (nsec < 0) { + --sec; + nsec += 1000000000; + } + loadNs = nsec; + if (sec > 0 && sec < 4) { + loadNs += sec * 1000000000; + } + } else { + // first time through the loop + oldLoadValid = true; + } + oldLoad = newLoad; + } + // get the absolute value of CPU clock frequency in kHz + int cpuNum = sched_getcpu(); + uint32_t kHz = tcu.getCpukHz(cpuNum); + kHz = (kHz & ~0xF) | (cpuNum & 0xF); + // save values in FIFO queues for dumpsys + // these stores #1, #2, #3 are not atomic with respect to each other, + // or with respect to store #4 below + dumpState->mMonotonicNs[i] = monotonicNs; + dumpState->mLoadNs[i] = loadNs; + dumpState->mCpukHz[i] = kHz; + // this store #4 is not atomic with respect to stores #1, #2, #3 above, but + // the newest open and oldest closed halves are atomic with respect to each other + dumpState->mBounds = bounds; #endif } else { // first time through the loop @@ -474,6 +520,7 @@ bool FastMixer::threadLoop() sleepNs = periodNs; } + } // for (;;) // never return 'true'; Thread::_threadLoop() locks mutex which can result in priority inversion @@ -484,11 +531,16 @@ FastMixerDumpState::FastMixerDumpState() : mNumTracks(0), mWriteErrors(0), mUnderruns(0), mOverruns(0), mSampleRate(0), mFrameCount(0), /* mMeasuredWarmupTs({0, 0}), */ mWarmupCycles(0) #ifdef FAST_MIXER_STATISTICS - , mMean(0.0), mMinimum(0.0), mMaximum(0.0), mStddev(0.0) + , mBounds(0) #endif { mMeasuredWarmupTs.tv_sec = 0; mMeasuredWarmupTs.tv_nsec = 0; + // sample arrays aren't accessed atomically with respect to the bounds, + // so clearing reduces chance for dumpsys to read random uninitialized samples + memset(&mMonotonicNs, 0, sizeof(mMonotonicNs)); + memset(&mLoadNs, 0, sizeof(mLoadNs)); + memset(&mCpukHz, 0, sizeof(mCpukHz)); } FastMixerDumpState::~FastMixerDumpState() @@ -525,17 +577,63 @@ void FastMixerDumpState::dump(int fd) snprintf(string, COMMAND_MAX, "%d", mCommand); break; } - double mMeasuredWarmupMs = (mMeasuredWarmupTs.tv_sec * 1000.0) + + double measuredWarmupMs = (mMeasuredWarmupTs.tv_sec * 1000.0) + (mMeasuredWarmupTs.tv_nsec / 1000000.0); + double mixPeriodSec = (double) mFrameCount / (double) mSampleRate; fdprintf(fd, "FastMixer command=%s writeSequence=%u framesWritten=%u\n" " numTracks=%u writeErrors=%u underruns=%u overruns=%u\n" - " sampleRate=%u frameCount=%u measuredWarmup=%.3g ms, warmupCycles=%u\n", + " sampleRate=%u frameCount=%u measuredWarmup=%.3g ms, warmupCycles=%u\n" + " mixPeriod=%.2f ms\n", string, mWriteSequence, mFramesWritten, mNumTracks, mWriteErrors, mUnderruns, mOverruns, - mSampleRate, mFrameCount, mMeasuredWarmupMs, mWarmupCycles); + mSampleRate, mFrameCount, measuredWarmupMs, mWarmupCycles, + mixPeriodSec * 1e3); #ifdef FAST_MIXER_STATISTICS - fdprintf(fd, " cycle time in ms: mean=%.1f min=%.1f max=%.1f stddev=%.1f\n", - mMean*1e3, mMinimum*1e3, mMaximum*1e3, mStddev*1e3); + // find the interval of valid samples + uint32_t bounds = mBounds; + uint32_t newestOpen = bounds & 0xFFFF; + uint32_t oldestClosed = bounds >> 16; + uint32_t n = (newestOpen - oldestClosed) & 0xFFFF; + if (n > kSamplingN) { + ALOGE("too many samples %u", n); + n = kSamplingN; + } + // statistics for monotonic (wall clock) time, thread raw CPU load in time, CPU clock frequency, + // and adjusted CPU load in MHz normalized for CPU clock frequency + CentralTendencyStatistics wall, loadNs, kHz, loadMHz; + // only compute adjusted CPU load in Hz if current CPU number and CPU clock frequency are stable + bool valid = false; + uint32_t previousCpukHz = 0; + // loop over all the samples + for (; n > 0; --n) { + size_t i = oldestClosed++ & (kSamplingN - 1); + uint32_t wallNs = mMonotonicNs[i]; + wall.sample(wallNs); + uint32_t sampleLoadNs = mLoadNs[i]; + uint32_t sampleCpukHz = mCpukHz[i]; + loadNs.sample(sampleLoadNs); + kHz.sample(sampleCpukHz & ~0xF); + if (sampleCpukHz == previousCpukHz) { + double megacycles = (double) sampleLoadNs * (double) sampleCpukHz; + double adjMHz = megacycles / mixPeriodSec; // _not_ wallNs * 1e9 + loadMHz.sample(adjMHz); + } + previousCpukHz = sampleCpukHz; + } + fdprintf(fd, "Simple moving statistics over last %.1f seconds:\n", wall.n() * mixPeriodSec); + fdprintf(fd, " wall clock time in ms per mix cycle:\n" + " mean=%.2f min=%.2f max=%.2f stddev=%.2f\n", + wall.mean()*1e-6, wall.minimum()*1e-6, wall.maximum()*1e-6, wall.stddev()*1e-6); + fdprintf(fd, " raw CPU load in us per mix cycle:\n" + " mean=%.0f min=%.0f max=%.0f stddev=%.0f\n", + loadNs.mean()*1e-3, loadNs.minimum()*1e-3, loadNs.maximum()*1e-3, + loadNs.stddev()*1e-3); + fdprintf(fd, " CPU clock frequency in MHz:\n" + " mean=%.0f min=%.0f max=%.0f stddev=%.0f\n", + kHz.mean()*1e-3, kHz.minimum()*1e-3, kHz.maximum()*1e-3, kHz.stddev()*1e-3); + fdprintf(fd, " adjusted CPU load in MHz (i.e. normalized for CPU clock frequency):\n" + " mean=%.1f min=%.1f max=%.1f stddev=%.1f\n", + loadMHz.mean(), loadMHz.minimum(), loadMHz.maximum(), loadMHz.stddev()); #endif } diff --git a/services/audioflinger/FastMixer.h b/services/audioflinger/FastMixer.h index a6dd310..e2ed553 100644 --- a/services/audioflinger/FastMixer.h +++ b/services/audioflinger/FastMixer.h @@ -64,7 +64,7 @@ struct FastMixerDumpState { FastMixerDumpState(); /*virtual*/ ~FastMixerDumpState(); - void dump(int fd); + void dump(int fd); // should only be called on a stable copy, not the original FastMixerState::Command mCommand; // current command uint32_t mWriteSequence; // incremented before and after each write() @@ -78,12 +78,20 @@ struct FastMixerDumpState { struct timespec mMeasuredWarmupTs; // measured warmup time uint32_t mWarmupCycles; // number of loop cycles required to warmup FastTrackDump mTracks[FastMixerState::kMaxFastTracks]; + #ifdef FAST_MIXER_STATISTICS - // cycle times in seconds - float mMean; - float mMinimum; - float mMaximum; - float mStddev; + // Recently collected samples of per-cycle monotonic time, thread CPU time, and CPU frequency. + // kSamplingN is the size of the sampling frame, and must be a power of 2 <= 0x8000. + static const uint32_t kSamplingN = 0x1000; + // The bounds define the interval of valid samples, and are represented as follows: + // newest open (excluded) endpoint = lower 16 bits of bounds, modulo N + // oldest closed (included) endpoint = upper 16 bits of bounds, modulo N + // Number of valid samples is newest - oldest. + uint32_t mBounds; // bounds for mMonotonicNs, mThreadCpuNs, and mCpukHz + // The elements in the *Ns arrays are in units of nanoseconds <= 3999999999. + uint32_t mMonotonicNs[kSamplingN]; // delta monotonic (wall clock) time + uint32_t mLoadNs[kSamplingN]; // delta CPU load in time + uint32_t mCpukHz[kSamplingN]; // absolute CPU clock frequency in kHz, bits 0-3 are CPU# #endif }; |