Re: [PATCH 2/2] sched_ext: Dump the stall CPU first in watchdog exit

From: Andrea Righi

Date: Thu Apr 09 2026 - 01:45:07 EST


Hi Changwoo,

On Wed, Apr 08, 2026 at 12:11:13PM +0900, Changwoo Min wrote:
> When a watchdog timeout fires, the CPU where the stalled task was
> running is the most relevant piece of information for diagnosing the
> hang. However, if there are many CPUs, the dump can get truncated and
> the stall CPU's information may not appear in the output.
>
> Add a stall_cpu field to scx_exit_info, thread it through scx_vexit()
> and __scx_exit(), and populate it from cpu_of(rq) in
> check_rq_for_timeouts(). In scx_dump_state(), dump the stall CPU
> before iterating the rest so it always appears at the top of the output.
>
> Introduce a scx_exit() macro that wraps __scx_exit() with stall_cpu=0
> for all non-stall exit paths, keeping call sites unchanged.

Should we use stall_cpu = -1 as a sentinel to represent "no stall"?

>
> Signed-off-by: Changwoo Min <changwoo@xxxxxxxxxx>
> ---
> kernel/sched/ext.c | 31 ++++++++++++++++++++-----------
> kernel/sched/ext_internal.h | 3 +++
> 2 files changed, 23 insertions(+), 11 deletions(-)
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 8f7d5c1556be..671a1713aedb 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -200,24 +200,28 @@ static bool task_dead_and_done(struct task_struct *p);
> static void scx_kick_cpu(struct scx_sched *sch, s32 cpu, u64 flags);
> static void scx_disable(struct scx_sched *sch, enum scx_exit_kind kind);
> static bool scx_vexit(struct scx_sched *sch, enum scx_exit_kind kind,
> - s64 exit_code, const char *fmt, va_list args);
> + s64 exit_code, int stall_cpu, const char *fmt,
> + va_list args);
>
> -static __printf(4, 5) bool scx_exit(struct scx_sched *sch,
> - enum scx_exit_kind kind, s64 exit_code,
> - const char *fmt, ...)
> +static __printf(5, 6) bool __scx_exit(struct scx_sched *sch,
> + enum scx_exit_kind kind, s64 exit_code,
> + int stall_cpu, const char *fmt, ...)
> {
> va_list args;
> bool ret;
>
> va_start(args, fmt);
> - ret = scx_vexit(sch, kind, exit_code, fmt, args);
> + ret = scx_vexit(sch, kind, exit_code, stall_cpu, fmt, args);
> va_end(args);
>
> return ret;
> }
>
> +#define scx_exit(sch, kind, exit_code, fmt, args...) \
> + __scx_exit(sch, kind, exit_code, 0, fmt, ##args)
> +
> #define scx_error(sch, fmt, args...) scx_exit((sch), SCX_EXIT_ERROR, 0, fmt, ##args)
> -#define scx_verror(sch, fmt, args) scx_vexit((sch), SCX_EXIT_ERROR, 0, fmt, args)
> +#define scx_verror(sch, fmt, args) scx_vexit((sch), SCX_EXIT_ERROR, 0, 0, fmt, args)
>
> #define SCX_HAS_OP(sch, op) test_bit(SCX_OP_IDX(op), (sch)->has_op)
>
> @@ -3433,9 +3437,10 @@ static bool check_rq_for_timeouts(struct rq *rq)
> last_runnable + READ_ONCE(sch->watchdog_timeout)))) {
> u32 dur_ms = jiffies_to_msecs(jiffies - last_runnable);
>
> - scx_exit(sch, SCX_EXIT_ERROR_STALL, 0,
> - "%s[%d] failed to run for %u.%03us",
> - p->comm, p->pid, dur_ms / 1000, dur_ms % 1000);
> + __scx_exit(sch, SCX_EXIT_ERROR_STALL, 0, cpu_of(rq),
> + "%s[%d] failed to run for %u.%03us",
> + p->comm, p->pid, dur_ms / 1000,
> + dur_ms % 1000);
> timed_out = true;
> break;
> }
> @@ -6337,8 +6342,11 @@ static void scx_dump_state(struct scx_sched *sch, struct scx_exit_info *ei,
> dump_line(&s, "CPU states");
> dump_line(&s, "----------");
>
> + /* Dump the stall CPU first, then dump the rest in order. */
> + scx_dump_cpu(sch, &s, &dctx, ei->stall_cpu, dump_all_tasks);

And here we can skip this if ei->stall_cpu < 0.

> for_each_possible_cpu(cpu) {
> - scx_dump_cpu(sch, &s, &dctx, cpu, dump_all_tasks);
> + if (cpu != ei->stall_cpu)
> + scx_dump_cpu(sch, &s, &dctx, cpu, dump_all_tasks);
> }
>
> dump_newline(&s);
> @@ -6377,7 +6385,7 @@ static void scx_disable_irq_workfn(struct irq_work *irq_work)
> }
>
> static bool scx_vexit(struct scx_sched *sch,
> - enum scx_exit_kind kind, s64 exit_code,
> + enum scx_exit_kind kind, s64 exit_code, int stall_cpu,
> const char *fmt, va_list args)
> {
> struct scx_exit_info *ei = sch->exit_info;
> @@ -6400,6 +6408,7 @@ static bool scx_vexit(struct scx_sched *sch,
> */
> ei->kind = kind;
> ei->reason = scx_exit_reason(ei->kind);
> + ei->stall_cpu = stall_cpu;
>
> irq_work_queue(&sch->disable_irq_work);
> return true;
> diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
> index b4f36d8b9c1d..a0a09e8f2ac2 100644
> --- a/kernel/sched/ext_internal.h
> +++ b/kernel/sched/ext_internal.h
> @@ -93,6 +93,9 @@ struct scx_exit_info {
> /* %SCX_EXIT_* - broad category of the exit reason */
> enum scx_exit_kind kind;
>
> + /* CPU where a task stall happened. */
> + int stall_cpu;
> +

With CO-RE we shouldn't have any compatibility issue, but would it make sense to
move this at the end of the struct anyway?

> /* exit code if gracefully exiting */
> s64 exit_code;
>
> --
> 2.53.0
>

Thanks,
-Andrea