diff --git a/include/haproxy/bug.h b/include/haproxy/bug.h index 556d8167e..e708bcedf 100644 --- a/include/haproxy/bug.h +++ b/include/haproxy/bug.h @@ -421,6 +421,7 @@ enum tainted_flags { TAINTED_LUA_STUCK = 0x00000400, /* stuck in a Lua context */ TAINTED_LUA_STUCK_SHARED = 0x00000800, /* stuck in a shared Lua context */ TAINTED_MEM_TRIMMING_STUCK = 0x00001000, /* stuck while trimming memory */ + TAINTED_WARN_BLOCKED_TRAFFIC = 0x00002000, /* emitted a warning about blocked traffic */ }; /* this is a bit field made of TAINTED_*, and is declared in haproxy.c */ diff --git a/src/debug.c b/src/debug.c index 4d79ef17c..3feff5f09 100644 --- a/src/debug.c +++ b/src/debug.c @@ -738,7 +738,7 @@ void ha_stuck_warning(int thr) struct buffer buf; ullong n, p; - if (get_tainted() & TAINTED_PANIC) { + if (mark_tainted(TAINTED_WARN_BLOCKED_TRAFFIC) & TAINTED_PANIC) { /* a panic dump is already in progress, let's not disturb it, * we'll be called via signal DEBUGSIG. By returning we may be * able to leave a current signal handler (e.g. WDT) so that diff --git a/src/wdt.c b/src/wdt.c index fd07d7fb7..a28923b33 100644 --- a/src/wdt.c +++ b/src/wdt.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -38,6 +39,7 @@ */ static struct { timer_t timer; + uint prev_ctxsw; } per_thread_wd_ctx[MAX_THREADS]; /* Setup (or ping) the watchdog timer for thread . Returns non-zero on @@ -106,10 +108,18 @@ void wdt_handler(int sig, siginfo_t *si, void *arg) * scheduler is still alive by setting the TH_FL_STUCK flag * that the scheduler clears when switching to the next task. * If it's already set, then it's our second call with no - * progress and the thread is dead. + * progress and the thread is dead. However, if we figure + * that the scheduler made no progress since last time, we'll + * at least emit a warning. */ if (!(_HA_ATOMIC_LOAD(&ha_thread_ctx[thr].flags) & TH_FL_STUCK)) { + uint prev_ctxsw; + _HA_ATOMIC_OR(&ha_thread_ctx[thr].flags, TH_FL_STUCK); + prev_ctxsw = HA_ATOMIC_LOAD(&per_thread_wd_ctx[tid].prev_ctxsw); + if (HA_ATOMIC_LOAD(&activity[thr].ctxsw) == prev_ctxsw) + ha_stuck_warning(thr); + HA_ATOMIC_STORE(&activity[thr].ctxsw, prev_ctxsw); goto update_and_leave; }