summaryrefslogtreecommitdiffstats
path: root/services/java/com/android/server/Watchdog.java
diff options
context:
space:
mode:
authorChristopher Tate <ctate@google.com>2013-10-23 17:28:27 -0700
committerChristopher Tate <ctate@google.com>2013-10-24 10:46:28 -0700
commite6f81cf1f69e0683f969238f921950befba8e6c3 (patch)
treed845ebcc18cc2e941e33c47b0e37a0ad116cb58f /services/java/com/android/server/Watchdog.java
parentd9e98a4b6d3181ab3d58b7781ee2f88a389ecf1f (diff)
downloadframeworks_base-e6f81cf1f69e0683f969238f921950befba8e6c3.zip
frameworks_base-e6f81cf1f69e0683f969238f921950befba8e6c3.tar.gz
frameworks_base-e6f81cf1f69e0683f969238f921950befba8e6c3.tar.bz2
Support different watchdog timeouts for different entities
We need to be able to perform very lengthy operations on some threads (e.g. the I/O thread responsible for installing multi-gigabyte APKs) but still have long-run deadlock/hang detection applied to those threads. Previously the watchdog mechanism applied the same policy to all monitored threads: unresponsive after 60 seconds => restart the system. Now, each monitored entity can have its own independent timeout after which the watchdog declares deadlock and restarts the runtime. The halfway-finished intermediate thread stacks are dumped based on the specific entity's declared timeout, not the global 30 second checking interval. With that new mechanism in place, the Package Manager's lengthy-I/O thread watchdog timeout is raised to 10 minutes. Bug 11278188 Change-Id: I512599260009c31416b2385f778681e5b9597f05
Diffstat (limited to 'services/java/com/android/server/Watchdog.java')
-rw-r--r--services/java/com/android/server/Watchdog.java113
1 files changed, 77 insertions, 36 deletions
diff --git a/services/java/com/android/server/Watchdog.java b/services/java/com/android/server/Watchdog.java
index 616090e..8054788 100644
--- a/services/java/com/android/server/Watchdog.java
+++ b/services/java/com/android/server/Watchdog.java
@@ -58,7 +58,14 @@ public class Watchdog extends Thread {
// Set this to true to have the watchdog record kernel thread stacks when it fires
static final boolean RECORD_KERNEL_THREADS = true;
- static final int TIME_TO_WAIT = DB ? 5*1000 : 30*1000;
+ static final long DEFAULT_TIMEOUT = DB ? 10*1000 : 60*1000;
+ static final long CHECK_INTERVAL = DEFAULT_TIMEOUT / 2;
+
+ // These are temporally ordered: larger values as lateness increases
+ static final int COMPLETED = 0;
+ static final int WAITING = 1;
+ static final int WAITED_HALF = 2;
+ static final int OVERDUE = 3;
static final String[] NATIVE_STACKS_OF_INTEREST = new String[] {
"/system/bin/mediaserver",
@@ -87,13 +94,17 @@ public class Watchdog extends Thread {
public final class HandlerChecker implements Runnable {
private final Handler mHandler;
private final String mName;
+ private final long mWaitMax;
private final ArrayList<Monitor> mMonitors = new ArrayList<Monitor>();
private boolean mCompleted;
private Monitor mCurrentMonitor;
+ private long mStartTime;
- HandlerChecker(Handler handler, String name) {
+ HandlerChecker(Handler handler, String name, long waitMaxMillis) {
mHandler = handler;
mName = name;
+ mWaitMax = waitMaxMillis;
+ mCompleted = true;
}
public void addMonitor(Monitor monitor) {
@@ -111,13 +122,34 @@ public class Watchdog extends Thread {
mCompleted = true;
return;
}
+
+ if (!mCompleted) {
+ // we already have a check in flight, so no need
+ return;
+ }
+
mCompleted = false;
mCurrentMonitor = null;
+ mStartTime = SystemClock.uptimeMillis();
mHandler.postAtFrontOfQueue(this);
}
- public boolean isCompletedLocked() {
- return mCompleted;
+ public boolean isOverdueLocked() {
+ return (!mCompleted) && (SystemClock.uptimeMillis() > mStartTime + mWaitMax);
+ }
+
+ public int getCompletionStateLocked() {
+ if (mCompleted) {
+ return COMPLETED;
+ } else {
+ long latency = SystemClock.uptimeMillis() - mStartTime;
+ if (latency < mWaitMax/2) {
+ return WAITING;
+ } else if (latency < mWaitMax) {
+ return WAITED_HALF;
+ }
+ }
+ return OVERDUE;
}
public Thread getThread() {
@@ -186,16 +218,19 @@ public class Watchdog extends Thread {
// The shared foreground thread is the main checker. It is where we
// will also dispatch monitor checks and do other work.
- mMonitorChecker = new HandlerChecker(FgThread.getHandler(), "foreground thread");
+ mMonitorChecker = new HandlerChecker(FgThread.getHandler(),
+ "foreground thread", DEFAULT_TIMEOUT);
mHandlerCheckers.add(mMonitorChecker);
// Add checker for main thread. We only do a quick check since there
// can be UI running on the thread.
mHandlerCheckers.add(new HandlerChecker(new Handler(Looper.getMainLooper()),
- "main thread"));
+ "main thread", DEFAULT_TIMEOUT));
// Add checker for shared UI thread.
- mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(), "ui thread"));
+ mHandlerCheckers.add(new HandlerChecker(UiThread.getHandler(),
+ "ui thread", DEFAULT_TIMEOUT));
// And also check IO thread.
- mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(), "i/o thread"));
+ mHandlerCheckers.add(new HandlerChecker(IoThread.getHandler(),
+ "i/o thread", DEFAULT_TIMEOUT));
}
public void init(Context context, BatteryService battery,
@@ -242,11 +277,15 @@ public class Watchdog extends Thread {
}
public void addThread(Handler thread, String name) {
+ addThread(thread, name, DEFAULT_TIMEOUT);
+ }
+
+ public void addThread(Handler thread, String name, long timeoutMillis) {
synchronized (this) {
if (isAlive()) {
throw new RuntimeException("Threads can't be added once the Watchdog is running");
}
- mHandlerCheckers.add(new HandlerChecker(thread, name));
+ mHandlerCheckers.add(new HandlerChecker(thread, name, timeoutMillis));
}
}
@@ -259,21 +298,20 @@ public class Watchdog extends Thread {
pms.reboot(false, reason, false);
}
- private boolean haveAllCheckersCompletedLocked() {
+ private int evaluateCheckerCompletionLocked() {
+ int state = COMPLETED;
for (int i=0; i<mHandlerCheckers.size(); i++) {
HandlerChecker hc = mHandlerCheckers.get(i);
- if (!hc.isCompletedLocked()) {
- return false;
- }
+ state = Math.max(state, hc.getCompletionStateLocked());
}
- return true;
+ return state;
}
private ArrayList<HandlerChecker> getBlockedCheckersLocked() {
ArrayList<HandlerChecker> checkers = new ArrayList<HandlerChecker>();
for (int i=0; i<mHandlerCheckers.size(); i++) {
HandlerChecker hc = mHandlerCheckers.get(i);
- if (!hc.isCompletedLocked()) {
+ if (hc.isOverdueLocked()) {
checkers.add(hc);
}
}
@@ -299,14 +337,12 @@ public class Watchdog extends Thread {
final String subject;
final boolean allowRestart;
synchronized (this) {
- long timeout = TIME_TO_WAIT;
- if (!waitedHalf) {
- // If we are not at the half-point of waiting, perform a
- // new set of checks. Otherwise we are still waiting for a previous set.
- for (int i=0; i<mHandlerCheckers.size(); i++) {
- HandlerChecker hc = mHandlerCheckers.get(i);
- hc.scheduleCheckLocked();
- }
+ long timeout = CHECK_INTERVAL;
+ // Make sure we (re)spin the checkers that have become idle within
+ // this wait-and-check interval
+ for (int i=0; i<mHandlerCheckers.size(); i++) {
+ HandlerChecker hc = mHandlerCheckers.get(i);
+ hc.scheduleCheckLocked();
}
// NOTE: We use uptimeMillis() here because we do not want to increment the time we
@@ -320,26 +356,31 @@ public class Watchdog extends Thread {
} catch (InterruptedException e) {
Log.wtf(TAG, e);
}
- timeout = TIME_TO_WAIT - (SystemClock.uptimeMillis() - start);
+ timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start);
}
- if (haveAllCheckersCompletedLocked()) {
- // The monitors have returned.
+ final int waitState = evaluateCheckerCompletionLocked();
+ if (waitState == COMPLETED) {
+ // The monitors have returned; reset
waitedHalf = false;
continue;
- }
-
- if (!waitedHalf) {
- // We've waited half the deadlock-detection interval. Pull a stack
- // trace and wait another half.
- ArrayList<Integer> pids = new ArrayList<Integer>();
- pids.add(Process.myPid());
- ActivityManagerService.dumpStackTraces(true, pids, null, null,
- NATIVE_STACKS_OF_INTEREST);
- waitedHalf = true;
+ } else if (waitState == WAITING) {
+ // still waiting but within their configured intervals; back off and recheck
+ continue;
+ } else if (waitState == WAITED_HALF) {
+ if (!waitedHalf) {
+ // We've waited half the deadlock-detection interval. Pull a stack
+ // trace and wait another half.
+ ArrayList<Integer> pids = new ArrayList<Integer>();
+ pids.add(Process.myPid());
+ ActivityManagerService.dumpStackTraces(true, pids, null, null,
+ NATIVE_STACKS_OF_INTEREST);
+ waitedHalf = true;
+ }
continue;
}
+ // something is overdue!
blockedCheckers = getBlockedCheckersLocked();
subject = describeCheckersLocked(blockedCheckers);
allowRestart = mAllowRestart;