From 3da009b2504099c7ef50fd76ca78176cd927baed Mon Sep 17 00:00:00 2001 From: Ashay Jaiswal Date: Fri, 17 Nov 2023 10:41:28 +0530 Subject: [PATCH 1/2] sched: walt: crash system if UTRA is called without runqueue lock "update_task_ravg" is expected to be called with runqueue lock held by the caller, induce a panic if UTRA is called for a runqueue without it being locked. Also, add an early runqueue lock check in "__walt_irq_work_locked" to dump debug data and induce system crash. Change-Id: Ie7bb29a7845004b3c64541b32295bb61bbe32aba Signed-off-by: Ashay Jaiswal Signed-off-by: Shaleen Agrawal --- kernel/sched/walt/walt.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/kernel/sched/walt/walt.c b/kernel/sched/walt/walt.c index ac0e325346d4..03160e4eb13e 100644 --- a/kernel/sched/walt/walt.c +++ b/kernel/sched/walt/walt.c @@ -2359,6 +2359,14 @@ static void walt_update_task_ravg(struct task_struct *p, struct rq *rq, int even if (!wrq->window_start || wts->mark_start == wallclock) return; + if (unlikely(!raw_spin_is_locked(&rq->__lock))) { + printk_deferred("WALT-BUG CPU%d: %s task %s(%d) unlocked access for cpu=%d suspende=%d last_clk=%llu stack[%pS <== %pS <== %pS]\n", + raw_smp_processor_id(), __func__, p->comm, p->pid, rq->cpu, + walt_clock_suspended, sched_clock_last, + (void *)CALLER_ADDR0, (void *)CALLER_ADDR1, (void *)CALLER_ADDR2); + WALT_PANIC(1); + } + walt_lockdep_assert_rq(rq, p); old_window_start = update_window_start(rq, wallclock, event); @@ -4270,6 +4278,15 @@ static inline void __walt_irq_work_locked(bool is_migration, bool is_asym_migrat if (rq->curr) { /* only update ravg for locked cpus */ if (cpumask_intersects(lock_cpus, &cluster->cpus)) { + if (unlikely(!raw_spin_is_locked(&rq->__lock))) { + printk_deferred("WALT-BUG %s unlocked cpu=%d is_migration=%d is_asym_migration=%d is_shared_rail_migration=%d lock_cpus=%*pbl suspended=%d last_clk=%llu stack[%pS <= %pS <= %pS]\n", + __func__, rq->cpu, is_migration, is_asym_migration, + is_shared_rail_migration, + cpumask_pr_args(lock_cpus), walt_clock_suspended, + sched_clock_last, (void *)CALLER_ADDR0, + (void *)CALLER_ADDR1, (void *)CALLER_ADDR2); + WALT_PANIC(1); + } walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wc, 0); account_load_subtractions(rq); From 427da3d77b297accbf7cf197d755e811ba5be191 Mon Sep 17 00:00:00 2001 From: Shaleen Agrawal Date: Wed, 22 Nov 2023 13:04:58 -0800 Subject: [PATCH 2/2] sched/walt: Add fallback plan in WALT_BUGs In the event that userspace has disabled WALT_BUGs from panic, ensure the system can continue to operate in a relatively sane manner by resetting appropriate values. For example, if scheduler crashes as time appears to have gone backwards, reset the clock time to appear to be the latest recorded one. Change-Id: I6e26c71e9185b2e7704be3a6a519b48301d4a2a3 Signed-off-by: Shaleen Agrawal --- kernel/sched/walt/walt.c | 48 +++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/kernel/sched/walt/walt.c b/kernel/sched/walt/walt.c index 03160e4eb13e..f129112b9415 100644 --- a/kernel/sched/walt/walt.c +++ b/kernel/sched/walt/walt.c @@ -419,17 +419,20 @@ update_window_start(struct rq *rq, u64 wallclock, int event) bool full_window; if (wallclock < wrq->latest_clock) { - printk_deferred("WALT-BUG CPU%d; wallclock=%llu(0x%llx) is lesser than latest_clock=%llu(0x%llx)", - rq->cpu, wallclock, wallclock, wrq->latest_clock, - wrq->latest_clock); - WALT_PANIC(1); + WALT_BUG(WALT_BUG_WALT, NULL, + "on CPU%d; wallclock=%llu(0x%llx) is lesser than latest_clock=%llu(0x%llx)", + rq->cpu, wallclock, wallclock, wrq->latest_clock, + wrq->latest_clock); + wallclock = wrq->latest_clock; } delta = wallclock - wrq->window_start; if (delta < 0) { - printk_deferred("WALT-BUG CPU%d; wallclock=%llu(0x%llx) is lesser than window_start=%llu(0x%llx)", - rq->cpu, wallclock, wallclock, - wrq->window_start, wrq->window_start); - WALT_PANIC(1); + WALT_BUG(WALT_BUG_WALT, NULL, + " on CPU%d; wallclock=%llu(0x%llx) is lesser than window_start=%llu(0x%llx)", + rq->cpu, wallclock, wallclock, + wrq->window_start, wrq->window_start); + delta = 0; + wallclock = wrq->window_start; } wrq->latest_clock = wallclock; if (delta < sched_ravg_window) @@ -2314,10 +2317,11 @@ update_task_rq_cpu_cycles(struct task_struct *p, struct rq *rq, int event, time_delta = wallclock - wts->mark_start; if ((s64)time_delta < 0) { - printk_deferred("WALT-BUG pid=%u CPU%d wallclock=%llu(0x%llx) < mark_start=%llu(0x%llx) event=%d irqtime=%llu", - p->pid, rq->cpu, wallclock, wallclock, - wts->mark_start, wts->mark_start, event, irqtime); - WALT_PANIC((s64)time_delta < 0); + WALT_BUG(WALT_BUG_WALT, p, + "WALT-BUG pid=%u CPU%d wallclock=%llu(0x%llx) < mark_start=%llu(0x%llx) event=%d irqtime=%llu", + p->pid, rq->cpu, wallclock, wallclock, + wts->mark_start, wts->mark_start, event, irqtime); + time_delta = 1; } wrq->task_exec_scale = DIV64_U64_ROUNDUP(cycles_delta * @@ -2359,13 +2363,12 @@ static void walt_update_task_ravg(struct task_struct *p, struct rq *rq, int even if (!wrq->window_start || wts->mark_start == wallclock) return; - if (unlikely(!raw_spin_is_locked(&rq->__lock))) { - printk_deferred("WALT-BUG CPU%d: %s task %s(%d) unlocked access for cpu=%d suspende=%d last_clk=%llu stack[%pS <== %pS <== %pS]\n", - raw_smp_processor_id(), __func__, p->comm, p->pid, rq->cpu, - walt_clock_suspended, sched_clock_last, - (void *)CALLER_ADDR0, (void *)CALLER_ADDR1, (void *)CALLER_ADDR2); - WALT_PANIC(1); - } + if (unlikely(!raw_spin_is_locked(&rq->__lock))) + WALT_BUG(WALT_BUG_WALT, p, + "on CPU%d: %s task %s(%d) unlocked access for cpu=%d suspende=%d last_clk=%llu stack[%pS <== %pS <== %pS]\n", + raw_smp_processor_id(), __func__, p->comm, p->pid, rq->cpu, + walt_clock_suspended, sched_clock_last, + (void *)CALLER_ADDR0, (void *)CALLER_ADDR1, (void *)CALLER_ADDR2); walt_lockdep_assert_rq(rq, p); @@ -4278,15 +4281,14 @@ static inline void __walt_irq_work_locked(bool is_migration, bool is_asym_migrat if (rq->curr) { /* only update ravg for locked cpus */ if (cpumask_intersects(lock_cpus, &cluster->cpus)) { - if (unlikely(!raw_spin_is_locked(&rq->__lock))) { - printk_deferred("WALT-BUG %s unlocked cpu=%d is_migration=%d is_asym_migration=%d is_shared_rail_migration=%d lock_cpus=%*pbl suspended=%d last_clk=%llu stack[%pS <= %pS <= %pS]\n", + if (unlikely(!raw_spin_is_locked(&rq->__lock))) + WALT_BUG(WALT_BUG_WALT, NULL, + "WALT-BUG %s unlocked cpu=%d is_migration=%d is_asym_migration=%d is_shared_rail_migration=%d lock_cpus=%*pbl suspended=%d last_clk=%llu stack[%pS <= %pS <= %pS]\n", __func__, rq->cpu, is_migration, is_asym_migration, is_shared_rail_migration, cpumask_pr_args(lock_cpus), walt_clock_suspended, sched_clock_last, (void *)CALLER_ADDR0, (void *)CALLER_ADDR1, (void *)CALLER_ADDR2); - WALT_PANIC(1); - } walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, wc, 0); account_load_subtractions(rq);