soc: qcom: Print irq count only when panic happen
Print the top 10 irq count in watchdog pet handler doesn't help too much in most irq storm cases. So use a new method to record and compute the irq count delta, then show the top 5 irqs when watchdog bark or panic to help solve irq storm problems. Change-Id: I7bae2bf1bf4fc15c1874eebc8661b9d8c4600229 Signed-off-by: Huang Yiwei <quic_hyiwei@quicinc.com>
This commit is contained in:
parent
5bdbd9edab
commit
b1807b46e0
4 changed files with 62 additions and 209 deletions
|
|
@ -130,7 +130,6 @@ CONFIG_QCOM_IOMMU_DEBUG=m
|
|||
CONFIG_QCOM_IOMMU_UTIL=m
|
||||
# CONFIG_QCOM_IPA is not set
|
||||
CONFIG_QCOM_IPCC=m
|
||||
CONFIG_QCOM_IRQ_STAT=y
|
||||
CONFIG_QCOM_LAZY_MAPPING=m
|
||||
CONFIG_QCOM_LLCC=m
|
||||
CONFIG_QCOM_LLCC_PERFMON=m
|
||||
|
|
|
|||
|
|
@ -546,15 +546,6 @@ config QCOM_SOC_WATCHDOG
|
|||
utilize this the Qualcomm Technologies, Inc. watchdog framework must
|
||||
also be enabled.
|
||||
|
||||
config QCOM_IRQ_STAT
|
||||
bool "QCOM IRQ stats"
|
||||
depends on QCOM_WDT_CORE
|
||||
help
|
||||
This give irq stats for top hitter at
|
||||
watchdog pet, watchdog bark and kernel panics.
|
||||
This provides additional debug information
|
||||
for irq counts on cpu and ipi counts.
|
||||
|
||||
config QCOM_FORCE_WDOG_BITE_ON_PANIC
|
||||
bool "QCOM force watchdog bite on panic"
|
||||
depends on QCOM_WDT_CORE
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2012-2021, The Linux Foundation. All rights reserved.
|
||||
* Copyright (c) 2021-2022, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
* Copyright (c) 2021-2023, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
#include <linux/irqdomain.h>
|
||||
#include <linux/delay.h>
|
||||
|
|
@ -36,9 +36,7 @@
|
|||
#include <linux/notifier.h>
|
||||
|
||||
#define MASK_SIZE 32
|
||||
#define COMPARE_RET -1
|
||||
|
||||
typedef int (*compare_t) (const void *lhs, const void *rhs);
|
||||
static struct msm_watchdog_data *wdog_data;
|
||||
|
||||
static void qcom_wdt_dump_cpu_alive_mask(struct msm_watchdog_data *wdog_dd)
|
||||
|
|
@ -51,200 +49,88 @@ static void qcom_wdt_dump_cpu_alive_mask(struct msm_watchdog_data *wdog_dd)
|
|||
alive_mask_buf);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_QCOM_IRQ_STAT
|
||||
static int cmp_irq_info_fn(const void *a, const void *b)
|
||||
{
|
||||
struct qcom_irq_info *lhs = (struct qcom_irq_info *)a;
|
||||
struct qcom_irq_info *rhs = (struct qcom_irq_info *)b;
|
||||
|
||||
if (lhs->total_count < rhs->total_count)
|
||||
return 1;
|
||||
|
||||
if (lhs->total_count > rhs->total_count)
|
||||
return COMPARE_RET;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void swap_irq_info_fn(void *a, void *b, int size)
|
||||
{
|
||||
struct qcom_irq_info temp;
|
||||
struct qcom_irq_info *lhs = (struct qcom_irq_info *)a;
|
||||
struct qcom_irq_info *rhs = (struct qcom_irq_info *)b;
|
||||
|
||||
temp = *lhs;
|
||||
*lhs = *rhs;
|
||||
*rhs = temp;
|
||||
}
|
||||
|
||||
static struct qcom_irq_info *search(struct qcom_irq_info *key,
|
||||
struct qcom_irq_info *base,
|
||||
size_t num, compare_t cmp)
|
||||
{
|
||||
struct qcom_irq_info *pivot = NULL;
|
||||
int result;
|
||||
|
||||
while (num > 0) {
|
||||
pivot = base + (num >> 1);
|
||||
result = cmp(key, pivot);
|
||||
|
||||
if (result == 0)
|
||||
goto out;
|
||||
|
||||
if (result > 0) {
|
||||
base = pivot + 1;
|
||||
num--;
|
||||
}
|
||||
|
||||
if (num)
|
||||
num >>= 1;
|
||||
}
|
||||
|
||||
out:
|
||||
if (pivot)
|
||||
pr_debug("*pivot:%u key:%u\n",
|
||||
pivot->total_count, key->total_count);
|
||||
|
||||
return pivot;
|
||||
}
|
||||
|
||||
static void print_irq_stat(struct msm_watchdog_data *wdog_dd)
|
||||
{
|
||||
int index;
|
||||
int cpu, ipi_nr;
|
||||
struct qcom_irq_info *info;
|
||||
|
||||
pr_info("(virq:irq_count)- ");
|
||||
for (index = 0; index < NR_TOP_HITTERS; index++) {
|
||||
info = &wdog_dd->irq_counts[index];
|
||||
pr_cont("%u:%u ", info->irq, info->total_count);
|
||||
}
|
||||
pr_cont("\n");
|
||||
|
||||
pr_info("(cpu:irq_count)- ");
|
||||
for_each_possible_cpu(cpu)
|
||||
pr_cont("%u:%u ", cpu, wdog_dd->tot_irq_count[cpu]);
|
||||
pr_cont("\n");
|
||||
|
||||
pr_info("(ipi:irq_count)- ");
|
||||
ipi_nr = nr_ipi_get();
|
||||
for (index = 0; index < ipi_nr; index++) {
|
||||
info = &wdog_dd->ipi_counts[index];
|
||||
pr_cont("%u:%u ", info->irq, info->total_count);
|
||||
}
|
||||
pr_cont("\n");
|
||||
}
|
||||
|
||||
static void compute_irq_stat(struct work_struct *work)
|
||||
void record_irq_count(void)
|
||||
{
|
||||
int irq, ipi_nr;
|
||||
unsigned int count;
|
||||
int index = 0, cpu, irq, ipi_nr;
|
||||
struct irq_desc *desc, **desc_ipi_arr;
|
||||
struct qcom_irq_info *pos;
|
||||
struct qcom_irq_info *start;
|
||||
struct qcom_irq_info key = {0};
|
||||
unsigned int running;
|
||||
struct msm_watchdog_data *wdog_dd = container_of(work,
|
||||
struct msm_watchdog_data,
|
||||
irq_counts_work);
|
||||
struct irq_desc *desc;
|
||||
|
||||
size_t arr_size = ARRAY_SIZE(wdog_dd->irq_counts);
|
||||
ipi_nr = nr_ipi_get();
|
||||
|
||||
/* avoid parallel execution from bark handler and queued
|
||||
* irq_counts_work.
|
||||
*/
|
||||
running = atomic_xchg(&wdog_dd->irq_counts_running, 1);
|
||||
if (running)
|
||||
return;
|
||||
|
||||
/* per irq counts */
|
||||
rcu_read_lock();
|
||||
for_each_irq_nr(irq) {
|
||||
if (irq <= ipi_nr)
|
||||
continue;
|
||||
|
||||
desc = irq_to_desc(irq);
|
||||
if (!desc)
|
||||
continue;
|
||||
|
||||
count = kstat_irqs_usr(irq);
|
||||
if (!count)
|
||||
desc->android_vendor_data1 = count;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
struct irq_info {
|
||||
int irq;
|
||||
unsigned int count;
|
||||
struct irq_desc *desc;
|
||||
};
|
||||
|
||||
void compute_irq_count(void)
|
||||
{
|
||||
int i, irq, ipi_nr;
|
||||
unsigned int count, diff;
|
||||
struct irq_desc *desc;
|
||||
unsigned long time_diff;
|
||||
struct irq_info irq_info_list[NR_TOP_HITTERS];
|
||||
unsigned int list_min_cnt = 0, list_min_pos = 0;
|
||||
|
||||
ipi_nr = nr_ipi_get();
|
||||
time_diff = (sched_clock() - wdog_data->last_pet) / 1000000;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_irq_nr(irq) {
|
||||
if (irq <= ipi_nr)
|
||||
continue;
|
||||
|
||||
if (index < arr_size) {
|
||||
wdog_dd->irq_counts[index].irq = irq;
|
||||
wdog_dd->irq_counts[index].total_count = count;
|
||||
for_each_possible_cpu(cpu)
|
||||
wdog_dd->irq_counts[index].irq_counter[cpu] =
|
||||
*per_cpu_ptr(desc->kstat_irqs, cpu);
|
||||
|
||||
index++;
|
||||
if (index == arr_size)
|
||||
sort(wdog_dd->irq_counts, arr_size,
|
||||
sizeof(*pos), cmp_irq_info_fn,
|
||||
swap_irq_info_fn);
|
||||
|
||||
desc = irq_to_desc(irq);
|
||||
if (!desc)
|
||||
continue;
|
||||
}
|
||||
|
||||
key.total_count = count;
|
||||
start = wdog_dd->irq_counts + (arr_size - 1);
|
||||
pos = search(&key, wdog_dd->irq_counts,
|
||||
arr_size, cmp_irq_info_fn);
|
||||
count = kstat_irqs_usr(irq);
|
||||
diff = count - desc->android_vendor_data1;
|
||||
|
||||
if (pos && (pos->total_count >= key.total_count)) {
|
||||
if (pos < start)
|
||||
pos++;
|
||||
else
|
||||
pos = NULL;
|
||||
}
|
||||
if (diff > list_min_cnt) {
|
||||
irq_info_list[list_min_pos].count = diff;
|
||||
irq_info_list[list_min_pos].irq = irq;
|
||||
irq_info_list[list_min_pos].desc = desc;
|
||||
|
||||
pr_debug("count :%u irq:%u\n", count, irq);
|
||||
if (pos && pos < start) {
|
||||
start--;
|
||||
for (; start >= pos ; start--)
|
||||
*(start + 1) = *start;
|
||||
}
|
||||
|
||||
if (pos) {
|
||||
pos->irq = irq;
|
||||
pos->total_count = count;
|
||||
for_each_possible_cpu(cpu)
|
||||
pos->irq_counter[cpu] =
|
||||
*per_cpu_ptr(desc->kstat_irqs, cpu);
|
||||
list_min_pos = 0;
|
||||
list_min_cnt = irq_info_list[0].count;
|
||||
for (i = 1; i < NR_TOP_HITTERS; i++) {
|
||||
if (irq_info_list[i].count < list_min_cnt) {
|
||||
list_min_pos = i;
|
||||
list_min_cnt = irq_info_list[i].count;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/* per cpu total irq counts */
|
||||
for_each_possible_cpu(cpu)
|
||||
wdog_dd->tot_irq_count[cpu] = kstat_cpu_irqs_sum(cpu);
|
||||
|
||||
/* per IPI counts */
|
||||
ipi_nr = nr_ipi_get();
|
||||
desc_ipi_arr = ipi_desc_get();
|
||||
for (index = 0; index < ipi_nr; index++) {
|
||||
wdog_dd->ipi_counts[index].total_count = 0;
|
||||
wdog_dd->ipi_counts[index].irq = index;
|
||||
irq = irq_desc_get_irq(desc_ipi_arr[index]);
|
||||
for_each_possible_cpu(cpu) {
|
||||
wdog_dd->ipi_counts[index].irq_counter[cpu] =
|
||||
kstat_irqs_cpu(irq, cpu);
|
||||
wdog_dd->ipi_counts[index].total_count +=
|
||||
wdog_dd->ipi_counts[index].irq_counter[cpu];
|
||||
pr_emerg("Top irqs in last %lu ms:\n", time_diff);
|
||||
for (i = 0; i < NR_TOP_HITTERS; i++) {
|
||||
count = irq_info_list[i].count;
|
||||
if (count != 0) {
|
||||
irq = irq_info_list[i].irq;
|
||||
desc = irq_info_list[i].desc;
|
||||
pr_emerg("IRQ %d [%s] - %d times\n",
|
||||
irq, desc->action->name, count);
|
||||
}
|
||||
}
|
||||
|
||||
print_irq_stat(wdog_dd);
|
||||
atomic_xchg(&wdog_dd->irq_counts_running, 0);
|
||||
}
|
||||
|
||||
static void queue_irq_counts_work(struct work_struct *irq_counts_work)
|
||||
{
|
||||
queue_work(system_unbound_wq, irq_counts_work);
|
||||
}
|
||||
#else
|
||||
static void queue_irq_counts_work(struct work_struct *irq_counts_work) { }
|
||||
static void compute_irq_stat(struct work_struct *work) { }
|
||||
#endif
|
||||
|
||||
static int qcom_wdt_hibernation_notifier(struct notifier_block *nb,
|
||||
unsigned long event, void *dummy)
|
||||
{
|
||||
|
|
@ -698,7 +584,7 @@ static __ref int qcom_wdt_kthread(void *arg)
|
|||
spin_unlock(&wdog_dd->freeze_lock);
|
||||
}
|
||||
|
||||
queue_irq_counts_work(&wdog_dd->irq_counts_work);
|
||||
record_irq_count();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -757,7 +643,6 @@ int qcom_wdt_remove(struct platform_device *pdev)
|
|||
wdog_dd->timer_expired = true;
|
||||
wdog_dd->user_pet_complete = true;
|
||||
kthread_stop(wdog_dd->watchdog_task);
|
||||
flush_work(&wdog_dd->irq_counts_work);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(qcom_wdt_remove);
|
||||
|
|
@ -771,7 +656,7 @@ void qcom_wdt_trigger_bite(void)
|
|||
{
|
||||
if (!wdog_data)
|
||||
return;
|
||||
compute_irq_stat(&wdog_data->irq_counts_work);
|
||||
compute_irq_count();
|
||||
dev_err(wdog_data->dev, "Causing a QCOM Apps Watchdog bite!\n");
|
||||
wdog_data->ops->show_wdt_status(wdog_data);
|
||||
wdog_data->ops->set_bite_time(1, wdog_data);
|
||||
|
|
@ -881,8 +766,6 @@ static int qcom_wdt_init(struct msm_watchdog_data *wdog_dd,
|
|||
if (ret)
|
||||
return ret;
|
||||
|
||||
INIT_WORK(&wdog_dd->irq_counts_work, compute_irq_stat);
|
||||
atomic_set(&wdog_dd->irq_counts_running, 0);
|
||||
delay_time = msecs_to_jiffies(wdog_dd->pet_time);
|
||||
wdog_dd->ops->set_bark_time(wdog_dd->bark_time, wdog_dd);
|
||||
wdog_dd->ops->set_bite_time(wdog_dd->bark_time + 3 * 1000, wdog_dd);
|
||||
|
|
@ -924,7 +807,6 @@ static int qcom_wdt_init(struct msm_watchdog_data *wdog_dd,
|
|||
}
|
||||
|
||||
del_timer_sync(&wdog_dd->pet_timer);
|
||||
flush_work(&wdog_dd->irq_counts_work);
|
||||
dev_err(wdog_dd->dev, "Failed Initializing QCOM Apps Watchdog\n");
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (c) 2019-2020 The Linux Foundation. All rights reserved.
|
||||
* Copyright (c) 2021-2022, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
* Copyright (c) 2021-2023, Qualcomm Innovation Center, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SOC_QCOM_WATCHDOG_H_
|
||||
|
|
@ -34,7 +34,6 @@
|
|||
|
||||
#if IS_ENABLED(CONFIG_QCOM_WDT_CORE)
|
||||
#include <linux/platform_device.h>
|
||||
#include <asm/hardirq.h>
|
||||
|
||||
/**
|
||||
* The enable constant that can be used between the core framework and the
|
||||
|
|
@ -67,24 +66,11 @@
|
|||
|
||||
|
||||
#define WDOG_NR_IPI 10
|
||||
#define NR_TOP_HITTERS 10
|
||||
#define NR_TOP_HITTERS 5
|
||||
|
||||
struct qcom_wdt_ops;
|
||||
struct msm_watchdog_data;
|
||||
|
||||
|
||||
/** qcom_irq_info - IRQ stats
|
||||
*
|
||||
* @irq: linux/virtual irq numer.
|
||||
* @total_count: sum of irq occurrence count on all cpu's.
|
||||
* @irq_counter: irq occurrence count on each cpu.
|
||||
*/
|
||||
struct qcom_irq_info {
|
||||
unsigned int irq;
|
||||
unsigned int total_count;
|
||||
unsigned int irq_counter[NR_CPUS];
|
||||
};
|
||||
|
||||
/** qcom_wdt_ops - The msm-watchdog-devices operations
|
||||
*
|
||||
* @set_bark_time: The routine for setting the watchdog bark time.
|
||||
|
|
@ -147,11 +133,6 @@ struct msm_watchdog_data {
|
|||
int cpu_idle_pc_state[NR_CPUS];
|
||||
bool freeze_in_progress;
|
||||
spinlock_t freeze_lock;
|
||||
struct work_struct irq_counts_work;
|
||||
struct qcom_irq_info irq_counts[NR_TOP_HITTERS];
|
||||
struct qcom_irq_info ipi_counts[WDOG_NR_IPI];
|
||||
unsigned int tot_irq_count[NR_CPUS];
|
||||
atomic_t irq_counts_running;
|
||||
struct timer_list user_pet_timer;
|
||||
bool hibernate;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue