diff options
author | Christopher Tate <ctate@google.com> | 2013-10-23 17:28:27 -0700 |
---|---|---|
committer | Christopher Tate <ctate@google.com> | 2013-10-24 10:46:28 -0700 |
commit | e6f81cf1f69e0683f969238f921950befba8e6c3 (patch) | |
tree | d845ebcc18cc2e941e33c47b0e37a0ad116cb58f /services/java/com/android/server/Watchdog.java | |
parent | d9e98a4b6d3181ab3d58b7781ee2f88a389ecf1f (diff) | |
download | frameworks_base-e6f81cf1f69e0683f969238f921950befba8e6c3.zip frameworks_base-e6f81cf1f69e0683f969238f921950befba8e6c3.tar.gz frameworks_base-e6f81cf1f69e0683f969238f921950befba8e6c3.tar.bz2 |
Support different watchdog timeouts for different entities
We need to be able to perform very lengthy operations on some threads
(e.g. the I/O thread responsible for installing multi-gigabyte APKs) but
still have long-run deadlock/hang detection applied to those threads.
Previously the watchdog mechanism applied the same policy to all
monitored threads: unresponsive after 60 seconds => restart the system.
Now, each monitored entity can have its own independent timeout after
which the watchdog declares deadlock and restarts the runtime. The
halfway-finished intermediate thread stacks are dumped based on the
specific entity's declared timeout, not the global 30 second checking
interval.
With that new mechanism in place, the Package Manager's lengthy-I/O
thread watchdog timeout is raised to 10 minutes.
Bug 11278188
Change-Id: I512599260009c31416b2385f778681e5b9597f05
Diffstat (limited to 'services/java/com/android/server/Watchdog.java')
-rw-r--r-- | services/java/com/android/server/Watchdog.java | 113 |
1 files changed, 77 insertions, 36 deletions
diff --git a/services/java/com/android/server/Watchdog.java b/services/java/com/android/server/Watchdog.java index 616090e..8054788 100644 --- a/services/java/com/android/server/Watchdog.java +++ b/services/java/com/android/server/Watchdog.java @@ -58,7 +58,14 @@ public class Watchdog extends Thread { // Set this to true to have the watchdog record kernel thread stacks when it fires static final boolean RECORD_KERNEL_THREADS = true; - static final int TIME_TO_WAIT = DB ? 5*1000 : 30*1000; + static final long DEFAULT_TIMEOUT = DB ? 10*1000 : 60*1000; + static final long CHECK_INTERVAL = DEFAULT_TIMEOUT / 2; + + // These are temporally ordered: larger values as lateness increases + static final int COMPLETED = 0; + static final int WAITING = 1; + static final int WAITED_HALF = 2; + static final int OVERDUE = 3; static final String[] NATIVE_STACKS_OF_INTEREST = new String[] { "/system/bin/mediaserver", @@ -87,13 +94,17 @@ public class Watchdog extends Thread { public final class HandlerChecker implements Runnable { private final Handler mHandler; private final String mName; + private final long mWaitMax; private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>(); private boolean mCompleted; private Monitor mCurrentMonitor; + private long mStartTime; - HandlerChecker(Handler handler, String name) { + HandlerChecker(Handler handler, String name, long waitMaxMillis) { mHandler = handler; mName = name; + mWaitMax = waitMaxMillis; + mCompleted = true; } public void addMonitor(Monitor monitor) { @@ -111,13 +122,34 @@ public class Watchdog extends Thread { mCompleted = true; return; } + + if (!mCompleted) { + // we already have a check in flight, so no need + return; + } + mCompleted = false; mCurrentMonitor = null; + mStartTime = SystemClock.uptimeMillis(); mHandler.postAtFrontOfQueue(this); } - public boolean isCompletedLocked() { - return mCompleted; + public boolean isOverdueLocked() { + return (!mCompleted) && (SystemClock.uptimeMillis() > mStartTime + mWaitMax); + } + + public int getCompletionStateLocked() { + if (mCompleted) { + return COMPLETED; + } else { + long latency = SystemClock.uptimeMillis() - mStartTime; + if (latency < mWaitMax/2) { + return WAITING; + } else if (latency < mWaitMax) { + return WAITED_HALF; + } + } + return OVERDUE; } public Thread getThread() { @@ -186,16 +218,19 @@ public class Watchdog extends Thread { // The shared foreground thread is the main checker. It is where we // will also dispatch monitor checks and do other work. - mMonitorChecker = new HandlerChecker(FgThread.getHandler(), "foreground thread"); + mMonitorChecker = new HandlerChecker(FgThread.getHandler(), + "foreground thread", DEFAULT_TIMEOUT); mHandlerCheckers.add(mMonitorChecker); // Add checker for main thread. We only do a quick check since there // can be UI running on the thread. mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()), - "main thread")); + "main thread", DEFAULT_TIMEOUT)); // Add checker for shared UI thread. - mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(), "ui thread")); + mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(), + "ui thread", DEFAULT_TIMEOUT)); // And also check IO thread. - mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(), "i/o thread")); + mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(), + "i/o thread", DEFAULT_TIMEOUT)); } public void init(Context context, BatteryService battery, @@ -242,11 +277,15 @@ public class Watchdog extends Thread { } public void addThread(Handler thread, String name) { + addThread(thread, name, DEFAULT_TIMEOUT); + } + + public void addThread(Handler thread, String name, long timeoutMillis) { synchronized (this) { if (isAlive()) { throw new RuntimeException("Threads can't be added once the Watchdog is running"); } - mHandlerCheckers.add(new HandlerChecker(thread, name)); + mHandlerCheckers.add(new HandlerChecker(thread, name, timeoutMillis)); } } @@ -259,21 +298,20 @@ public class Watchdog extends Thread { pms.reboot(false, reason, false); } - private boolean haveAllCheckersCompletedLocked() { + private int evaluateCheckerCompletionLocked() { + int state = COMPLETED; for (int i=0; i<mHandlerCheckers.size(); i++) { HandlerChecker hc = mHandlerCheckers.get(i); - if (!hc.isCompletedLocked()) { - return false; - } + state = Math.max(state, hc.getCompletionStateLocked()); } - return true; + return state; } private ArrayList<HandlerChecker> getBlockedCheckersLocked() { ArrayList<HandlerChecker> checkers = new ArrayList<HandlerChecker>(); for (int i=0; i<mHandlerCheckers.size(); i++) { HandlerChecker hc = mHandlerCheckers.get(i); - if (!hc.isCompletedLocked()) { + if (hc.isOverdueLocked()) { checkers.add(hc); } } @@ -299,14 +337,12 @@ public class Watchdog extends Thread { final String subject; final boolean allowRestart; synchronized (this) { - long timeout = TIME_TO_WAIT; - if (!waitedHalf) { - // If we are not at the half-point of waiting, perform a - // new set of checks. Otherwise we are still waiting for a previous set. - for (int i=0; i<mHandlerCheckers.size(); i++) { - HandlerChecker hc = mHandlerCheckers.get(i); - hc.scheduleCheckLocked(); - } + long timeout = CHECK_INTERVAL; + // Make sure we (re)spin the checkers that have become idle within + // this wait-and-check interval + for (int i=0; i<mHandlerCheckers.size(); i++) { + HandlerChecker hc = mHandlerCheckers.get(i); + hc.scheduleCheckLocked(); } // NOTE: We use uptimeMillis() here because we do not want to increment the time we @@ -320,26 +356,31 @@ public class Watchdog extends Thread { } catch (InterruptedException e) { Log.wtf(TAG, e); } - timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start); + timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start); } - if (haveAllCheckersCompletedLocked()) { - // The monitors have returned. + final int waitState = evaluateCheckerCompletionLocked(); + if (waitState == COMPLETED) { + // The monitors have returned; reset waitedHalf = false; continue; - } - - if (!waitedHalf) { - // We've waited half the deadlock-detection interval. Pull a stack - // trace and wait another half. - ArrayList<Integer> pids = new ArrayList<Integer>(); - pids.add(Process.myPid()); - ActivityManagerService.dumpStackTraces(true, pids, null, null, - NATIVE_STACKS_OF_INTEREST); - waitedHalf = true; + } else if (waitState == WAITING) { + // still waiting but within their configured intervals; back off and recheck + continue; + } else if (waitState == WAITED_HALF) { + if (!waitedHalf) { + // We've waited half the deadlock-detection interval. Pull a stack + // trace and wait another half. + ArrayList<Integer> pids = new ArrayList<Integer>(); + pids.add(Process.myPid()); + ActivityManagerService.dumpStackTraces(true, pids, null, null, + NATIVE_STACKS_OF_INTEREST); + waitedHalf = true; + } continue; } + // something is overdue! blockedCheckers = getBlockedCheckersLocked(); subject = describeCheckersLocked(blockedCheckers); allowRestart = mAllowRestart; |