16 #ifndef hifi_DeadlockWatchdog_h
17 #define hifi_DeadlockWatchdog_h
21 #include <NumericalConstants.h>
22 #include <SharedUtil.h>
23 #include <crash-handler/CrashHandler.h>
25 #include "InterfaceLogging.h"
26 #include "SimpleMovingAverage.h"
28 class DeadlockWatchdogThread :
public QThread {
30 static const unsigned long HEARTBEAT_UPDATE_INTERVAL_SECS = 1;
31 static const unsigned long MAX_HEARTBEAT_AGE_USECS = 120 * USECS_PER_SECOND;
32 static const int WARNING_ELAPSED_HEARTBEAT = 500 * USECS_PER_MSEC;
33 static const int HEARTBEAT_SAMPLES = 100000;
36 DeadlockWatchdogThread() {
37 setObjectName(
"Deadlock Watchdog");
39 _heartbeat = usecTimestampNow();
41 connect(qApp, &QCoreApplication::aboutToQuit, [
this] {
46 void setMainThreadID(Qt::HANDLE threadID) {
47 _mainThreadID = threadID;
50 static void updateHeartbeat() {
51 auto now = usecTimestampNow();
52 auto elapsed = now - _heartbeat;
53 _movingAverage.addSample(elapsed);
57 void deadlockDetectionCrash() {
58 auto &ch = CrashHandler::getInstance();
60 ch.setAnnotation(
"_mod_faulting_tid", std::to_string((uint64_t)_mainThreadID));
61 ch.setAnnotation(
"deadlock",
"1");
62 uint32_t* crashTrigger =
nullptr;
63 *crashTrigger = 0xDEAD10CC;
66 static void withPause(
const std::function<
void()>& lambda) {
75 static void resume() {
83 QThread::sleep(HEARTBEAT_UPDATE_INTERVAL_SECS);
88 uint64_t lastHeartbeat = _heartbeat;
89 uint64_t now = usecTimestampNow();
90 auto lastHeartbeatAge = (now > lastHeartbeat) ? now - lastHeartbeat : 0;
91 auto elapsedMovingAverage = _movingAverage.getAverage();
93 if (elapsedMovingAverage > _maxElapsedAverage * 1.1f) {
95 qCDebug(interfaceapp_deadlock) <<
"DEADLOCK WATCHDOG WARNING:"
96 <<
"lastHeartbeatAge:" << lastHeartbeatAge
97 <<
"elapsedMovingAverage:" << elapsedMovingAverage
98 <<
"maxElapsed:" << _maxElapsed
99 <<
"PREVIOUS maxElapsedAverage:" << _maxElapsedAverage
100 <<
"NEW maxElapsedAverage:" << elapsedMovingAverage <<
"** NEW MAX ELAPSED AVERAGE **"
101 <<
"samples:" << _movingAverage.getSamples();
103 _maxElapsedAverage = elapsedMovingAverage;
105 if (lastHeartbeatAge > _maxElapsed) {
107 qCDebug(interfaceapp_deadlock) <<
"DEADLOCK WATCHDOG WARNING:"
108 <<
"lastHeartbeatAge:" << lastHeartbeatAge
109 <<
"elapsedMovingAverage:" << elapsedMovingAverage
110 <<
"PREVIOUS maxElapsed:" << _maxElapsed
111 <<
"NEW maxElapsed:" << lastHeartbeatAge <<
"** NEW MAX ELAPSED **"
112 <<
"maxElapsedAverage:" << _maxElapsedAverage
113 <<
"samples:" << _movingAverage.getSamples();
115 _maxElapsed = lastHeartbeatAge;
119 if (elapsedMovingAverage > WARNING_ELAPSED_HEARTBEAT) {
120 qCDebug(interfaceapp_deadlock) <<
"DEADLOCK WATCHDOG WARNING:"
121 <<
"lastHeartbeatAge:" << lastHeartbeatAge
122 <<
"elapsedMovingAverage:" << elapsedMovingAverage <<
"** OVER EXPECTED VALUE **"
123 <<
"maxElapsed:" << _maxElapsed
124 <<
"maxElapsedAverage:" << _maxElapsedAverage
125 <<
"samples:" << _movingAverage.getSamples();
129 if (lastHeartbeatAge > MAX_HEARTBEAT_AGE_USECS) {
130 qCDebug(interfaceapp_deadlock) <<
"DEADLOCK DETECTED -- "
131 <<
"lastHeartbeatAge:" << lastHeartbeatAge
132 <<
"[ lastHeartbeat :" << lastHeartbeat
133 <<
"now:" << now <<
" ]"
134 <<
"elapsedMovingAverage:" << elapsedMovingAverage
135 <<
"maxElapsed:" << _maxElapsed
136 <<
"maxElapsedAverage:" << _maxElapsedAverage
137 <<
"samples:" << _movingAverage.getSamples();
142 deadlockDetectionCrash();
148 static std::atomic<bool> _paused;
149 static std::atomic<uint64_t> _heartbeat;
150 static std::atomic<uint64_t> _maxElapsed;
151 static std::atomic<int> _maxElapsedAverage;
152 static ThreadSafeMovingAverage<int, HEARTBEAT_SAMPLES> _movingAverage;
154 bool _quit {
false };
156 Qt::HANDLE _mainThreadID =
nullptr;