soc: qcom: Print irq count only when panic happen

Print the top 10 irq count in watchdog pet handler doesn't help
too much in most irq storm cases. So use a new method to record and
compute the irq count delta, then show the top 5 irqs when watchdog
bark or panic to help solve irq storm problems.

Change-Id: I7bae2bf1bf4fc15c1874eebc8661b9d8c4600229
Signed-off-by: Huang Yiwei <quic_hyiwei@quicinc.com>
This commit is contained in:
Huang Yiwei 2023-02-08 13:35:34 +08:00
parent 5bdbd9edab
commit b1807b46e0
4 changed files with 62 additions and 209 deletions

View file

@ -130,7 +130,6 @@ CONFIG_QCOM_IOMMU_DEBUG=m
CONFIG_QCOM_IOMMU_UTIL=m
# CONFIG_QCOM_IPA is not set
CONFIG_QCOM_IPCC=m
CONFIG_QCOM_IRQ_STAT=y
CONFIG_QCOM_LAZY_MAPPING=m
CONFIG_QCOM_LLCC=m
CONFIG_QCOM_LLCC_PERFMON=m

View file

@ -546,15 +546,6 @@ config QCOM_SOC_WATCHDOG
utilize this the Qualcomm Technologies, Inc. watchdog framework must
also be enabled.
config QCOM_IRQ_STAT
bool "QCOM IRQ stats"
depends on QCOM_WDT_CORE
help
This give irq stats for top hitter at
watchdog pet, watchdog bark and kernel panics.
This provides additional debug information
for irq counts on cpu and ipi counts.
config QCOM_FORCE_WDOG_BITE_ON_PANIC
bool "QCOM force watchdog bite on panic"
depends on QCOM_WDT_CORE

View file

@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2012-2021, The Linux Foundation. All rights reserved.
* Copyright (c) 2021-2022, Qualcomm Innovation Center, Inc. All rights reserved.
* Copyright (c) 2021-2023, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#include <linux/irqdomain.h>
#include <linux/delay.h>
@ -36,9 +36,7 @@
#include <linux/notifier.h>
#define MASK_SIZE 32
#define COMPARE_RET -1
typedef int (*compare_t) (const void *lhs, const void *rhs);
static struct msm_watchdog_data *wdog_data;
static void qcom_wdt_dump_cpu_alive_mask(struct msm_watchdog_data *wdog_dd)
@ -51,200 +49,88 @@ static void qcom_wdt_dump_cpu_alive_mask(struct msm_watchdog_data *wdog_dd)
alive_mask_buf);
}
#ifdef CONFIG_QCOM_IRQ_STAT
static int cmp_irq_info_fn(const void *a, const void *b)
{
struct qcom_irq_info *lhs = (struct qcom_irq_info *)a;
struct qcom_irq_info *rhs = (struct qcom_irq_info *)b;
if (lhs->total_count < rhs->total_count)
return 1;
if (lhs->total_count > rhs->total_count)
return COMPARE_RET;
return 0;
}
static void swap_irq_info_fn(void *a, void *b, int size)
{
struct qcom_irq_info temp;
struct qcom_irq_info *lhs = (struct qcom_irq_info *)a;
struct qcom_irq_info *rhs = (struct qcom_irq_info *)b;
temp = *lhs;
*lhs = *rhs;
*rhs = temp;
}
static struct qcom_irq_info *search(struct qcom_irq_info *key,
struct qcom_irq_info *base,
size_t num, compare_t cmp)
{
struct qcom_irq_info *pivot = NULL;
int result;
while (num > 0) {
pivot = base + (num >> 1);
result = cmp(key, pivot);
if (result == 0)
goto out;
if (result > 0) {
base = pivot + 1;
num--;
}
if (num)
num >>= 1;
}
out:
if (pivot)
pr_debug("*pivot:%u key:%u\n",
pivot->total_count, key->total_count);
return pivot;
}
static void print_irq_stat(struct msm_watchdog_data *wdog_dd)
{
int index;
int cpu, ipi_nr;
struct qcom_irq_info *info;
pr_info("(virq:irq_count)- ");
for (index = 0; index < NR_TOP_HITTERS; index++) {
info = &wdog_dd->irq_counts[index];
pr_cont("%u:%u ", info->irq, info->total_count);
}
pr_cont("\n");
pr_info("(cpu:irq_count)- ");
for_each_possible_cpu(cpu)
pr_cont("%u:%u ", cpu, wdog_dd->tot_irq_count[cpu]);
pr_cont("\n");
pr_info("(ipi:irq_count)- ");
ipi_nr = nr_ipi_get();
for (index = 0; index < ipi_nr; index++) {
info = &wdog_dd->ipi_counts[index];
pr_cont("%u:%u ", info->irq, info->total_count);
}
pr_cont("\n");
}
static void compute_irq_stat(struct work_struct *work)
void record_irq_count(void)
{
int irq, ipi_nr;
unsigned int count;
int index = 0, cpu, irq, ipi_nr;
struct irq_desc *desc, **desc_ipi_arr;
struct qcom_irq_info *pos;
struct qcom_irq_info *start;
struct qcom_irq_info key = {0};
unsigned int running;
struct msm_watchdog_data *wdog_dd = container_of(work,
struct msm_watchdog_data,
irq_counts_work);
struct irq_desc *desc;
size_t arr_size = ARRAY_SIZE(wdog_dd->irq_counts);
ipi_nr = nr_ipi_get();
/* avoid parallel execution from bark handler and queued
* irq_counts_work.
*/
running = atomic_xchg(&wdog_dd->irq_counts_running, 1);
if (running)
return;
/* per irq counts */
rcu_read_lock();
for_each_irq_nr(irq) {
if (irq <= ipi_nr)
continue;
desc = irq_to_desc(irq);
if (!desc)
continue;
count = kstat_irqs_usr(irq);
if (!count)
desc->android_vendor_data1 = count;
}
rcu_read_unlock();
}
struct irq_info {
int irq;
unsigned int count;
struct irq_desc *desc;
};
void compute_irq_count(void)
{
int i, irq, ipi_nr;
unsigned int count, diff;
struct irq_desc *desc;
unsigned long time_diff;
struct irq_info irq_info_list[NR_TOP_HITTERS];
unsigned int list_min_cnt = 0, list_min_pos = 0;
ipi_nr = nr_ipi_get();
time_diff = (sched_clock() - wdog_data->last_pet) / 1000000;
rcu_read_lock();
for_each_irq_nr(irq) {
if (irq <= ipi_nr)
continue;
if (index < arr_size) {
wdog_dd->irq_counts[index].irq = irq;
wdog_dd->irq_counts[index].total_count = count;
for_each_possible_cpu(cpu)
wdog_dd->irq_counts[index].irq_counter[cpu] =
*per_cpu_ptr(desc->kstat_irqs, cpu);
index++;
if (index == arr_size)
sort(wdog_dd->irq_counts, arr_size,
sizeof(*pos), cmp_irq_info_fn,
swap_irq_info_fn);
desc = irq_to_desc(irq);
if (!desc)
continue;
}
key.total_count = count;
start = wdog_dd->irq_counts + (arr_size - 1);
pos = search(&key, wdog_dd->irq_counts,
arr_size, cmp_irq_info_fn);
count = kstat_irqs_usr(irq);
diff = count - desc->android_vendor_data1;
if (pos && (pos->total_count >= key.total_count)) {
if (pos < start)
pos++;
else
pos = NULL;
}
if (diff > list_min_cnt) {
irq_info_list[list_min_pos].count = diff;
irq_info_list[list_min_pos].irq = irq;
irq_info_list[list_min_pos].desc = desc;
pr_debug("count :%u irq:%u\n", count, irq);
if (pos && pos < start) {
start--;
for (; start >= pos ; start--)
*(start + 1) = *start;
}
if (pos) {
pos->irq = irq;
pos->total_count = count;
for_each_possible_cpu(cpu)
pos->irq_counter[cpu] =
*per_cpu_ptr(desc->kstat_irqs, cpu);
list_min_pos = 0;
list_min_cnt = irq_info_list[0].count;
for (i = 1; i < NR_TOP_HITTERS; i++) {
if (irq_info_list[i].count < list_min_cnt) {
list_min_pos = i;
list_min_cnt = irq_info_list[i].count;
}
}
}
}
rcu_read_unlock();
/* per cpu total irq counts */
for_each_possible_cpu(cpu)
wdog_dd->tot_irq_count[cpu] = kstat_cpu_irqs_sum(cpu);
/* per IPI counts */
ipi_nr = nr_ipi_get();
desc_ipi_arr = ipi_desc_get();
for (index = 0; index < ipi_nr; index++) {
wdog_dd->ipi_counts[index].total_count = 0;
wdog_dd->ipi_counts[index].irq = index;
irq = irq_desc_get_irq(desc_ipi_arr[index]);
for_each_possible_cpu(cpu) {
wdog_dd->ipi_counts[index].irq_counter[cpu] =
kstat_irqs_cpu(irq, cpu);
wdog_dd->ipi_counts[index].total_count +=
wdog_dd->ipi_counts[index].irq_counter[cpu];
pr_emerg("Top irqs in last %lu ms:\n", time_diff);
for (i = 0; i < NR_TOP_HITTERS; i++) {
count = irq_info_list[i].count;
if (count != 0) {
irq = irq_info_list[i].irq;
desc = irq_info_list[i].desc;
pr_emerg("IRQ %d [%s] - %d times\n",
irq, desc->action->name, count);
}
}
print_irq_stat(wdog_dd);
atomic_xchg(&wdog_dd->irq_counts_running, 0);
}
static void queue_irq_counts_work(struct work_struct *irq_counts_work)
{
queue_work(system_unbound_wq, irq_counts_work);
}
#else
static void queue_irq_counts_work(struct work_struct *irq_counts_work) { }
static void compute_irq_stat(struct work_struct *work) { }
#endif
static int qcom_wdt_hibernation_notifier(struct notifier_block *nb,
unsigned long event, void *dummy)
{
@ -698,7 +584,7 @@ static __ref int qcom_wdt_kthread(void *arg)
spin_unlock(&wdog_dd->freeze_lock);
}
queue_irq_counts_work(&wdog_dd->irq_counts_work);
record_irq_count();
}
return 0;
}
@ -757,7 +643,6 @@ int qcom_wdt_remove(struct platform_device *pdev)
wdog_dd->timer_expired = true;
wdog_dd->user_pet_complete = true;
kthread_stop(wdog_dd->watchdog_task);
flush_work(&wdog_dd->irq_counts_work);
return 0;
}
EXPORT_SYMBOL(qcom_wdt_remove);
@ -771,7 +656,7 @@ void qcom_wdt_trigger_bite(void)
{
if (!wdog_data)
return;
compute_irq_stat(&wdog_data->irq_counts_work);
compute_irq_count();
dev_err(wdog_data->dev, "Causing a QCOM Apps Watchdog bite!\n");
wdog_data->ops->show_wdt_status(wdog_data);
wdog_data->ops->set_bite_time(1, wdog_data);
@ -881,8 +766,6 @@ static int qcom_wdt_init(struct msm_watchdog_data *wdog_dd,
if (ret)
return ret;
INIT_WORK(&wdog_dd->irq_counts_work, compute_irq_stat);
atomic_set(&wdog_dd->irq_counts_running, 0);
delay_time = msecs_to_jiffies(wdog_dd->pet_time);
wdog_dd->ops->set_bark_time(wdog_dd->bark_time, wdog_dd);
wdog_dd->ops->set_bite_time(wdog_dd->bark_time + 3 * 1000, wdog_dd);
@ -924,7 +807,6 @@ static int qcom_wdt_init(struct msm_watchdog_data *wdog_dd,
}
del_timer_sync(&wdog_dd->pet_timer);
flush_work(&wdog_dd->irq_counts_work);
dev_err(wdog_dd->dev, "Failed Initializing QCOM Apps Watchdog\n");
return ret;
}

View file

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2019-2020 The Linux Foundation. All rights reserved.
* Copyright (c) 2021-2022, Qualcomm Innovation Center, Inc. All rights reserved.
* Copyright (c) 2021-2023, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#ifndef _SOC_QCOM_WATCHDOG_H_
@ -34,7 +34,6 @@
#if IS_ENABLED(CONFIG_QCOM_WDT_CORE)
#include <linux/platform_device.h>
#include <asm/hardirq.h>
/**
* The enable constant that can be used between the core framework and the
@ -67,24 +66,11 @@
#define WDOG_NR_IPI 10
#define NR_TOP_HITTERS 10
#define NR_TOP_HITTERS 5
struct qcom_wdt_ops;
struct msm_watchdog_data;
/** qcom_irq_info - IRQ stats
*
* @irq: linux/virtual irq numer.
* @total_count: sum of irq occurrence count on all cpu's.
* @irq_counter: irq occurrence count on each cpu.
*/
struct qcom_irq_info {
unsigned int irq;
unsigned int total_count;
unsigned int irq_counter[NR_CPUS];
};
/** qcom_wdt_ops - The msm-watchdog-devices operations
*
* @set_bark_time: The routine for setting the watchdog bark time.
@ -147,11 +133,6 @@ struct msm_watchdog_data {
int cpu_idle_pc_state[NR_CPUS];
bool freeze_in_progress;
spinlock_t freeze_lock;
struct work_struct irq_counts_work;
struct qcom_irq_info irq_counts[NR_TOP_HITTERS];
struct qcom_irq_info ipi_counts[WDOG_NR_IPI];
unsigned int tot_irq_count[NR_CPUS];
atomic_t irq_counts_running;
struct timer_list user_pet_timer;
bool hibernate;
};