From 763b92568726b55569ea4896fe36e3c174cc3b0d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 11 Dec 2022 19:50:20 +0100 Subject: [PATCH 01/17] parisc: Align parisc MADV_XXX constants with all other architectures commit 71bdea6f798b425bc0003780b13e3fdecb16a010 upstream. Adjust some MADV_XXX constants to be in sync what their values are on all other platforms. There is currently no reason to have an own numbering on parisc, but it requires workarounds in many userspace sources (e.g. glibc, qemu, ...) - which are often forgotten and thus introduce bugs and different behaviour on parisc. A wrapper avoids an ABI breakage for existing userspace applications by translating any old values to the new ones, so this change allows us to move over all programs to the new ABI over time. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/uapi/asm/mman.h | 29 +++++++++++------------ arch/parisc/kernel/sys_parisc.c | 28 ++++++++++++++++++++++ arch/parisc/kernel/syscalls/syscall.tbl | 2 +- tools/arch/parisc/include/uapi/asm/mman.h | 12 +++++----- tools/perf/bench/bench.h | 12 ---------- 5 files changed, 49 insertions(+), 34 deletions(-) diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 22133a6a506e..68c44f99bc93 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -49,6 +49,19 @@ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ +#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ + +#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ +#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ + +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ @@ -57,27 +70,13 @@ #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ -#define MADV_MERGEABLE 65 /* KSM may merge identical pages */ -#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */ - -#define MADV_HUGEPAGE 67 /* Worth backing with hugepages */ -#define MADV_NOHUGEPAGE 68 /* Not worth backing with hugepages */ - -#define MADV_DONTDUMP 69 /* Explicity exclude from the core dump, - overrides the coredump filter bits */ -#define MADV_DODUMP 70 /* Clear the MADV_NODUMP flag */ - -#define MADV_WIPEONFORK 71 /* Zero memory on fork, child only */ -#define MADV_KEEPONFORK 72 /* Undo MADV_WIPEONFORK */ - -#define MADV_COLLAPSE 73 /* Synchronous hugepage collapse */ +#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ #define MADV_HWPOISON 100 /* poison a page for testing */ #define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */ /* compatibility flags */ #define MAP_FILE 0 -#define MAP_VARIABLE 0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 848b0702005d..09a34b07f02e 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -465,3 +465,31 @@ asmlinkage long parisc_inotify_init1(int flags) flags = FIX_O_NONBLOCK(flags); return sys_inotify_init1(flags); } + +/* + * madvise() wrapper + * + * Up to kernel v6.1 parisc has different values than all other + * platforms for the MADV_xxx flags listed below. + * To keep binary compatibility with existing userspace programs + * translate the former values to the new values. + * + * XXX: Remove this wrapper in year 2025 (or later) + */ + +asmlinkage notrace long parisc_madvise(unsigned long start, size_t len_in, int behavior) +{ + switch (behavior) { + case 65: behavior = MADV_MERGEABLE; break; + case 66: behavior = MADV_UNMERGEABLE; break; + case 67: behavior = MADV_HUGEPAGE; break; + case 68: behavior = MADV_NOHUGEPAGE; break; + case 69: behavior = MADV_DONTDUMP; break; + case 70: behavior = MADV_DODUMP; break; + case 71: behavior = MADV_WIPEONFORK; break; + case 72: behavior = MADV_KEEPONFORK; break; + case 73: behavior = MADV_COLLAPSE; break; + } + + return sys_madvise(start, len_in, behavior); +} diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 8a99c998da9b..0e42fceb2d5e 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -131,7 +131,7 @@ 116 common sysinfo sys_sysinfo compat_sys_sysinfo 117 common shutdown sys_shutdown 118 common fsync sys_fsync -119 common madvise sys_madvise +119 common madvise parisc_madvise 120 common clone sys_clone_wrapper 121 common setdomainname sys_setdomainname 122 common sendfile sys_sendfile compat_sys_sendfile diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h index 506c06a6536f..4cc88a642e10 100644 --- a/tools/arch/parisc/include/uapi/asm/mman.h +++ b/tools/arch/parisc/include/uapi/asm/mman.h @@ -1,20 +1,20 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H #define TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H -#define MADV_DODUMP 70 +#define MADV_DODUMP 17 #define MADV_DOFORK 11 -#define MADV_DONTDUMP 69 +#define MADV_DONTDUMP 16 #define MADV_DONTFORK 10 #define MADV_DONTNEED 4 #define MADV_FREE 8 -#define MADV_HUGEPAGE 67 -#define MADV_MERGEABLE 65 -#define MADV_NOHUGEPAGE 68 +#define MADV_HUGEPAGE 14 +#define MADV_MERGEABLE 12 +#define MADV_NOHUGEPAGE 15 #define MADV_NORMAL 0 #define MADV_RANDOM 1 #define MADV_REMOVE 9 #define MADV_SEQUENTIAL 2 -#define MADV_UNMERGEABLE 66 +#define MADV_UNMERGEABLE 13 #define MADV_WILLNEED 3 #define MAP_ANONYMOUS 0x10 #define MAP_DENYWRITE 0x0800 diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 6cefb4315d75..a5d49b3b6a09 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -10,25 +10,13 @@ extern struct timeval bench__start, bench__end, bench__runtime; * The madvise transparent hugepage constants were added in glibc * 2.13. For compatibility with older versions of glibc, define these * tokens if they are not already defined. - * - * PA-RISC uses different madvise values from other architectures and - * needs to be special-cased. */ -#ifdef __hppa__ -# ifndef MADV_HUGEPAGE -# define MADV_HUGEPAGE 67 -# endif -# ifndef MADV_NOHUGEPAGE -# define MADV_NOHUGEPAGE 68 -# endif -#else # ifndef MADV_HUGEPAGE # define MADV_HUGEPAGE 14 # endif # ifndef MADV_NOHUGEPAGE # define MADV_NOHUGEPAGE 15 # endif -#endif int bench_numa(int argc, const char **argv); int bench_sched_messaging(int argc, const char **argv); From f28418b06f6b1e83c81369ef1705ee53da106ae6 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 15 Nov 2022 15:09:27 -0800 Subject: [PATCH 02/17] x86/fpu: Take task_struct* in copy_sigframe_from_user_to_xstate() commit 6a877d2450ace4f27c012519e5a1ae818f931983 upstream. This will allow copy_sigframe_from_user_to_xstate() to grab the address of thread_struct's pkru value in a later patch. Signed-off-by: Kyle Huey Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20221115230932.7126-2-khuey%40kylehuey.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/signal.c | 2 +- arch/x86/kernel/fpu/xstate.c | 4 ++-- arch/x86/kernel/fpu/xstate.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 91d4b6de58ab..558076dbde5b 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -396,7 +396,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, fpregs = &fpu->fpstate->regs; if (use_xsave() && !fx_only) { - if (copy_sigframe_from_user_to_xstate(fpu->fpstate, buf_fx)) + if (copy_sigframe_from_user_to_xstate(tsk, buf_fx)) return false; } else { if (__copy_from_user(&fpregs->fxsave, buf_fx, diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c2dde46a538e..c88c60bac3ae 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1278,10 +1278,10 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf) * XSAVE[S] format and copy to the target thread. This is called from the * sigreturn() and rt_sigreturn() system calls. */ -int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, +int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf) { - return copy_uabi_to_xstate(fpstate, NULL, ubuf); + return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf); } static bool validate_independent_components(u64 mask) diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 5ad47031383b..f08ee2722e74 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -47,7 +47,7 @@ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode mode); extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf); -extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf); +extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf); extern void fpu__init_cpu_xstate(void); From a442736b704d4194ae68f75f4ee2e64cf2b8a142 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 15 Nov 2022 15:09:28 -0800 Subject: [PATCH 03/17] x86/fpu: Add a pkru argument to copy_uabi_from_kernel_to_xstate(). commit 1c813ce0305571e1b2e4cc4acca451da9e6ad18f upstream. Both KVM (through KVM_SET_XSTATE) and ptrace (through PTRACE_SETREGSET with NT_X86_XSTATE) ultimately call copy_uabi_from_kernel_to_xstate(), but the canonical locations for the current PKRU value for KVM guests and processes in a ptrace stop are different (in the kvm_vcpu_arch and the thread_state structs respectively). In preparation for eventually handling PKRU in copy_uabi_to_xstate, pass in a pointer to the PKRU location. Signed-off-by: Kyle Huey Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20221115230932.7126-3-khuey%40kylehuey.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/core.c | 2 +- arch/x86/kernel/fpu/regset.c | 2 +- arch/x86/kernel/fpu/xstate.c | 2 +- arch/x86/kernel/fpu/xstate.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d00db56a8868..db3300a617b7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -406,7 +406,7 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, if (ustate->xsave.header.xfeatures & ~xcr0) return -EINVAL; - ret = copy_uabi_from_kernel_to_xstate(kstate, ustate); + ret = copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru); if (ret) return ret; diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 75ffaef8c299..6d056b68f4ed 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -167,7 +167,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, } fpu_force_restore(fpu); - ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf); + ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf, &target->thread.pkru); out: vfree(tmpbuf); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c88c60bac3ae..d6f414af00cb 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1268,7 +1268,7 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] * format and copy to the target thread. Used by ptrace and KVM. */ -int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf) +int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru) { return copy_uabi_to_xstate(fpstate, kbuf, NULL); } diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index f08ee2722e74..a4ecb04d8d64 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -46,7 +46,7 @@ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, u32 pkru_val, enum xstate_copy_mode copy_mode); extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode mode); -extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf); +extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru); extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf); From bfa72faf69e82f7d64fefe0ca162f803d98e6679 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 15 Nov 2022 15:09:29 -0800 Subject: [PATCH 04/17] x86/fpu: Add a pkru argument to copy_uabi_to_xstate() commit 2c87767c35ee9744f666ccec869d5fe742c3de0a upstream. In preparation for moving PKRU handling code out of fpu_copy_uabi_to_guest_fpstate() and into copy_uabi_to_xstate(), add an argument that copy_uabi_from_kernel_to_xstate() can use to pass the canonical location of the PKRU value. For copy_sigframe_from_user_to_xstate() the kernel will actually restore the PKRU value from the fpstate, but pass in the thread_struct's pkru location anyways for consistency. Signed-off-by: Kyle Huey Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20221115230932.7126-4-khuey%40kylehuey.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/xstate.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d6f414af00cb..d657c8b1fb08 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1200,8 +1200,18 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, } +/** + * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate + * @fpstate: The fpstate buffer to copy to + * @kbuf: The UABI format buffer, if it comes from the kernel + * @ubuf: The UABI format buffer, if it comes from userspace + * @pkru: unused + * + * Converts from the UABI format into the kernel internal hardware + * dependent format. + */ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, - const void __user *ubuf) + const void __user *ubuf, u32 *pkru) { struct xregs_state *xsave = &fpstate->regs.xsave; unsigned int offset, size; @@ -1270,7 +1280,7 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, */ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru) { - return copy_uabi_to_xstate(fpstate, kbuf, NULL); + return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru); } /* @@ -1281,7 +1291,7 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf) { - return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf); + return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru); } static bool validate_independent_components(u64 mask) From 587478470b0a632c706090a8b0cbf59de0e98b48 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 15 Nov 2022 15:09:30 -0800 Subject: [PATCH 05/17] x86/fpu: Allow PKRU to be (once again) written by ptrace. commit 4a804c4f8356393d6b5eff7600f07615d7869c13 upstream. Move KVM's PKRU handling code in fpu_copy_uabi_to_guest_fpstate() to copy_uabi_to_xstate() so that it is shared with other APIs that write the XSTATE such as PTRACE_SETREGSET with NT_X86_XSTATE. This restores the pre-5.14 behavior of ptrace. The regression can be seen by running gdb and executing `p $pkru`, `set $pkru = 42`, and `p $pkru`. On affected kernels (5.14+) the write to the PKRU register (which gdb performs through ptrace) is ignored. [ dhansen: removed stable@ tag for now. The ABI was broken for long enough that this is not urgent material. Let's let it stew in tip for a few weeks before it's submitted to stable because there are so many ABIs potentially affected. ] Fixes: e84ba47e313d ("x86/fpu: Hook up PKRU into ptrace()") Signed-off-by: Kyle Huey Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20221115230932.7126-5-khuey%40kylehuey.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/core.c | 13 +------------ arch/x86/kernel/fpu/xstate.c | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index db3300a617b7..4bca73e065c6 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -391,8 +391,6 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, { struct fpstate *kstate = gfpu->fpstate; const union fpregs_state *ustate = buf; - struct pkru_state *xpkru; - int ret; if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) { if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE) @@ -406,16 +404,7 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, if (ustate->xsave.header.xfeatures & ~xcr0) return -EINVAL; - ret = copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru); - if (ret) - return ret; - - /* Retrieve PKRU if not in init state */ - if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) { - xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU); - *vpkru = xpkru->pkru; - } - return 0; + return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru); } EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate); #endif /* CONFIG_KVM */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d657c8b1fb08..a8cf604d8a25 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1205,10 +1205,22 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, * @fpstate: The fpstate buffer to copy to * @kbuf: The UABI format buffer, if it comes from the kernel * @ubuf: The UABI format buffer, if it comes from userspace - * @pkru: unused + * @pkru: The location to write the PKRU value to * * Converts from the UABI format into the kernel internal hardware * dependent format. + * + * This function ultimately has three different callers with distinct PKRU + * behavior. + * 1. When called from sigreturn the PKRU register will be restored from + * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to + * @fpstate is sufficient to cover this case, but the caller will also + * pass a pointer to the thread_struct's pkru field in @pkru and updating + * it is harmless. + * 2. When called from ptrace the PKRU register will be restored from the + * thread_struct's pkru field. A pointer to that is passed in @pkru. + * 3. When called from KVM the PKRU register will be restored from the vcpu's + * pkru field. A pointer to that is passed in @pkru. */ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, const void __user *ubuf, u32 *pkru) @@ -1260,6 +1272,13 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, } } + if (hdr.xfeatures & XFEATURE_MASK_PKRU) { + struct pkru_state *xpkru; + + xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU); + *pkru = xpkru->pkru; + } + /* * The state that came in from userspace was user-state only. * Mask all the user states out of 'xfeatures': From d2602da3da87a1832f708c8639b0afbfb5704cd4 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 15 Nov 2022 15:09:31 -0800 Subject: [PATCH 06/17] x86/fpu: Emulate XRSTOR's behavior if the xfeatures PKRU bit is not set commit d7e5aceace514a2b1b3ca3dc44f93f1704766ca7 upstream. The hardware XRSTOR instruction resets the PKRU register to its hardware init value (namely 0) if the PKRU bit is not set in the xfeatures mask. Emulating that here restores the pre-5.14 behavior for PTRACE_SET_REGSET with NT_X86_XSTATE, and makes sigreturn (which still uses XRSTOR) and ptrace behave identically. KVM has never used XRSTOR and never had this behavior, so KVM opts-out of this emulation by passing a NULL pkru pointer to copy_uabi_to_xstate(). Fixes: e84ba47e313d ("x86/fpu: Hook up PKRU into ptrace()") Signed-off-by: Kyle Huey Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20221115230932.7126-6-khuey%40kylehuey.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/core.c | 8 ++++++++ arch/x86/kernel/fpu/xstate.c | 15 ++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 4bca73e065c6..9baa89a8877d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -404,6 +404,14 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, if (ustate->xsave.header.xfeatures & ~xcr0) return -EINVAL; + /* + * Nullify @vpkru to preserve its current value if PKRU's bit isn't set + * in the header. KVM's odd ABI is to leave PKRU untouched in this + * case (all other components are eventually re-initialized). + */ + if (!(ustate->xsave.header.xfeatures & XFEATURE_MASK_PKRU)) + vpkru = NULL; + return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru); } EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index a8cf604d8a25..714166cc25f2 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1219,8 +1219,14 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, * it is harmless. * 2. When called from ptrace the PKRU register will be restored from the * thread_struct's pkru field. A pointer to that is passed in @pkru. + * The kernel will restore it manually, so the XRSTOR behavior that resets + * the PKRU register to the hardware init value (0) if the corresponding + * xfeatures bit is not set is emulated here. * 3. When called from KVM the PKRU register will be restored from the vcpu's - * pkru field. A pointer to that is passed in @pkru. + * pkru field. A pointer to that is passed in @pkru. KVM hasn't used + * XRSTOR and hasn't had the PKRU resetting behavior described above. To + * preserve that KVM behavior, it passes NULL for @pkru if the xfeatures + * bit is not set. */ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, const void __user *ubuf, u32 *pkru) @@ -1277,6 +1283,13 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU); *pkru = xpkru->pkru; + } else { + /* + * KVM may pass NULL here to indicate that it does not need + * PKRU updated. + */ + if (pkru) + *pkru = 0; } /* From 29fbaa434c223f29917e8de054cc48a96e6c0ed4 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 15 Nov 2022 15:09:32 -0800 Subject: [PATCH 07/17] selftests/vm/pkeys: Add a regression test for setting PKRU through ptrace commit 6ea25770b043c7997ab21d1ce95ba5de4d3d85d9 upstream. This tests PTRACE_SETREGSET with NT_X86_XSTATE modifying PKRU directly and removing the PKRU bit from XSTATE_BV. Signed-off-by: Kyle Huey Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20221115230932.7126-7-khuey%40kylehuey.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/vm/pkey-x86.h | 12 ++ tools/testing/selftests/vm/protection_keys.c | 131 ++++++++++++++++++- 2 files changed, 141 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index b078ce9c6d2a..72c14cd3ddc7 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -104,6 +104,18 @@ static inline int cpu_has_pkeys(void) return 1; } +static inline int cpu_max_xsave_size(void) +{ + unsigned long XSTATE_CPUID = 0xd; + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx); + return ecx; +} + static inline u32 pkey_bit_position(int pkey) { return pkey * PKEY_BITS_PER_PKEY; diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 291bc1e07842..95f403a0c46d 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -18,12 +18,13 @@ * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks * * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm */ #define _GNU_SOURCE #define __SANE_USERSPACE_TYPES__ #include +#include #include #include #include @@ -1550,6 +1551,129 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) do_not_expect_pkey_fault("plain read on recently PROT_EXEC area"); } +#if defined(__i386__) || defined(__x86_64__) +void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +{ + u32 new_pkru; + pid_t child; + int status, ret; + int pkey_offset = pkey_reg_xstate_offset(); + size_t xsave_size = cpu_max_xsave_size(); + void *xsave; + u32 *pkey_register; + u64 *xstate_bv; + struct iovec iov; + + new_pkru = ~read_pkey_reg(); + /* Don't make PROT_EXEC mappings inaccessible */ + new_pkru &= ~3; + + child = fork(); + pkey_assert(child >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), child); + if (!child) { + ptrace(PTRACE_TRACEME, 0, 0, 0); + /* Stop and allow the tracer to modify PKRU directly */ + raise(SIGSTOP); + + /* + * need __read_pkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + if (__read_pkey_reg() != new_pkru) + exit(1); + + /* Stop and allow the tracer to clear XSTATE_BV for PKRU */ + raise(SIGSTOP); + + if (__read_pkey_reg() != 0) + exit(1); + + /* Stop and allow the tracer to examine PKRU */ + raise(SIGSTOP); + + exit(0); + } + + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + xsave = (void *)malloc(xsave_size); + pkey_assert(xsave > 0); + + /* Modify the PKRU register directly */ + iov.iov_base = xsave; + iov.iov_len = xsave_size; + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + pkey_register = (u32 *)(xsave + pkey_offset); + pkey_assert(*pkey_register == read_pkey_reg()); + + *pkey_register = new_pkru; + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == new_pkru); + + /* Execute the tracee */ + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value change */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == new_pkru); + + /* Clear the PKRU bit from XSTATE_BV */ + xstate_bv = (u64 *)(xsave + 512); + *xstate_bv &= ~(1 << 9); + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == 0); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value go to 0 */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == 0); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFEXITED(status)); + pkey_assert(WEXITSTATUS(status) == 0); + free(xsave); +} +#endif + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1585,6 +1709,9 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_pkey_syscalls_bad_args, test_pkey_alloc_exhaust, test_pkey_alloc_free_attach_pkey0, +#if defined(__i386__) || defined(__x86_64__) + test_ptrace_modifies_pkru, +#endif }; void run_tests_once(void) From 8cc0e63ba86c9f4e732de9f42642231c8e3a31d2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 6 Jan 2023 12:43:37 -0500 Subject: [PATCH 08/17] Revert "SUNRPC: Use RMW bitops in single-threaded hot paths" commit 7827c81f0248e3c2f40d438b020f3d222f002171 upstream. The premise that "Once an svc thread is scheduled and executing an RPC, no other processes will touch svc_rqst::rq_flags" is false. svc_xprt_enqueue() examines the RQ_BUSY flag in scheduled nfsd threads when determining which thread to wake up next. Found via KCSAN. Fixes: 28df0988815f ("SUNRPC: Use RMW bitops in single-threaded hot paths") Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4proc.c | 7 +++---- fs/nfsd/nfs4xdr.c | 2 +- net/sunrpc/auth_gss/svcauth_gss.c | 4 ++-- net/sunrpc/svc.c | 6 +++--- net/sunrpc/svc_xprt.c | 2 +- net/sunrpc/svcsock.c | 8 ++++---- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- 7 files changed, 15 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 34d1cd5883fb..c7329523a10f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -928,7 +928,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * the client wants us to do more in this compound: */ if (!nfsd4_last_compound_op(rqstp)) - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, @@ -2615,12 +2615,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) cstate->minorversion = args->minorversion; fh_init(current_fh, NFS4_FHSIZE); fh_init(save_fh, NFS4_FHSIZE); - /* * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); /* * According to RFC3010, this takes precedence over all other errors. @@ -2742,7 +2741,7 @@ encode_op: out: cstate->status = status; /* Reset deferral mechanism for RPC deferrals */ - __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); return rpc_success; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 78849646fe83..8377e14b8fba 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2464,7 +2464,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) - __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); return true; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 9a5db285d4ae..bdc34ea0d939 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -900,7 +900,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g * rejecting the server-computed MIC in this somewhat rare case, * do not use splice with the GSS integrity service. */ - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Did we already verify the signature on the original pass through? */ if (rqstp->rq_deferred) @@ -972,7 +972,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs int pad, remaining_len, offset; u32 rseqno; - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 149171774bc6..24577d1b9907 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1244,10 +1244,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_short_len; /* Will be turned off by GSS integrity and privacy services */ - __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Will be turned off only when NFSv4 Sessions are used */ - __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); - __clear_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_DROPME, &rqstp->rq_flags); svc_putu32(resv, rqstp->rq_xid); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2106003645a7..c2ce12538008 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1238,7 +1238,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) trace_svc_defer(rqstp); svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); dr->handle.revisit = svc_revisit; return &dr->handle; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 2fc98fea59b4..e833103f4629 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -298,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs) static void svc_sock_secure_port(struct svc_rqst *rqstp) { if (svc_port_is_privileged(svc_addr(rqstp))) - __set_bit(RQ_SECURE, &rqstp->rq_flags); + set_bit(RQ_SECURE, &rqstp->rq_flags); else - __clear_bit(RQ_SECURE, &rqstp->rq_flags); + clear_bit(RQ_SECURE, &rqstp->rq_flags); } /* @@ -1008,9 +1008,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags)) - __set_bit(RQ_LOCAL, &rqstp->rq_flags); + set_bit(RQ_LOCAL, &rqstp->rq_flags); else - __clear_bit(RQ_LOCAL, &rqstp->rq_flags); + clear_bit(RQ_LOCAL, &rqstp->rq_flags); p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 199fa012f18a..94b20fb47135 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) static void svc_rdma_secure_port(struct svc_rqst *rqstp) { - __set_bit(RQ_SECURE, &rqstp->rq_flags); + set_bit(RQ_SECURE, &rqstp->rq_flags); } static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) From cd2e80a3ac7005f964680ae63946cdde4e970e0a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Jan 2023 17:04:49 -0600 Subject: [PATCH 09/17] gcc: disable -Warray-bounds for gcc-11 too commit 5a41237ad1d4b62008f93163af1d9b1da90729d8 upstream. We had already disabled this warning for gcc-12 due to bugs in the value range analysis, but it turns out we end up having some similar problems with gcc-11.3 too, so let's disable it there too. Older gcc versions end up being increasingly less relevant, and hopefully clang and newer version of gcc (ie gcc-13) end up working reliably enough that we still get the build coverage even when we disable this for some versions. Link: https://lore.kernel.org/all/20221227002941.GA2691687@roeck-us.net/ Link: https://lore.kernel.org/all/D8BDBF66-E44C-45D4-9758-BAAA4F0C1998@kernel.org/ Cc: Kees Cook Cc: Vlastimil Babka Cc: Guenter Roeck Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 94125d3b6893..0c214af99085 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -892,13 +892,17 @@ config CC_IMPLICIT_FALLTHROUGH default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5) default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough) -# Currently, disable gcc-12 array-bounds globally. +# Currently, disable gcc-11,12 array-bounds globally. # We may want to target only particular configurations some day. +config GCC11_NO_ARRAY_BOUNDS + def_bool y + config GCC12_NO_ARRAY_BOUNDS def_bool y config CC_NO_ARRAY_BOUNDS bool + default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_ARRAY_BOUNDS default y if CC_IS_GCC && GCC_VERSION >= 120000 && GCC_VERSION < 130000 && GCC12_NO_ARRAY_BOUNDS # From e8988e878af693ac13b0fa80ba2e72d22d68f2dd Mon Sep 17 00:00:00 2001 From: Frederick Lawler Date: Mon, 9 Jan 2023 10:39:06 -0600 Subject: [PATCH 10/17] net: sched: disallow noqueue for qdisc classes commit 96398560f26aa07e8f2969d73c8197e6a6d10407 upstream. While experimenting with applying noqueue to a classful queue discipline, we discovered a NULL pointer dereference in the __dev_queue_xmit() path that generates a kernel OOPS: # dev=enp0s5 # tc qdisc replace dev $dev root handle 1: htb default 1 # tc class add dev $dev parent 1: classid 1:1 htb rate 10mbit # tc qdisc add dev $dev parent 1:1 handle 10: noqueue # ping -I $dev -w 1 -c 1 1.1.1.1 [ 2.172856] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 2.173217] #PF: supervisor instruction fetch in kernel mode ... [ 2.178451] Call Trace: [ 2.178577] [ 2.178686] htb_enqueue+0x1c8/0x370 [ 2.178880] dev_qdisc_enqueue+0x15/0x90 [ 2.179093] __dev_queue_xmit+0x798/0xd00 [ 2.179305] ? _raw_write_lock_bh+0xe/0x30 [ 2.179522] ? __local_bh_enable_ip+0x32/0x70 [ 2.179759] ? ___neigh_create+0x610/0x840 [ 2.179968] ? eth_header+0x21/0xc0 [ 2.180144] ip_finish_output2+0x15e/0x4f0 [ 2.180348] ? dst_output+0x30/0x30 [ 2.180525] ip_push_pending_frames+0x9d/0xb0 [ 2.180739] raw_sendmsg+0x601/0xcb0 [ 2.180916] ? _raw_spin_trylock+0xe/0x50 [ 2.181112] ? _raw_spin_unlock_irqrestore+0x16/0x30 [ 2.181354] ? get_page_from_freelist+0xcd6/0xdf0 [ 2.181594] ? sock_sendmsg+0x56/0x60 [ 2.181781] sock_sendmsg+0x56/0x60 [ 2.181958] __sys_sendto+0xf7/0x160 [ 2.182139] ? handle_mm_fault+0x6e/0x1d0 [ 2.182366] ? do_user_addr_fault+0x1e1/0x660 [ 2.182627] __x64_sys_sendto+0x1b/0x30 [ 2.182881] do_syscall_64+0x38/0x90 [ 2.183085] entry_SYSCALL_64_after_hwframe+0x63/0xcd ... [ 2.187402] Previously in commit d66d6c3152e8 ("net: sched: register noqueue qdisc"), NULL was set for the noqueue discipline on noqueue init so that __dev_queue_xmit() falls through for the noqueue case. This also sets a bypass of the enqueue NULL check in the register_qdisc() function for the struct noqueue_disc_ops. Classful queue disciplines make it past the NULL check in __dev_queue_xmit() because the discipline is set to htb (in this case), and then in the call to __dev_xmit_skb(), it calls into htb_enqueue() which grabs a leaf node for a class and then calls qdisc_enqueue() by passing in a queue discipline which assumes ->enqueue() is not set to NULL. Fix this by not allowing classes to be assigned to the noqueue discipline. Linux TC Notes states that classes cannot be set to the noqueue discipline. [1] Let's enforce that here. Links: 1. https://linux-tc-notes.sourceforge.net/tc/doc/sch_noqueue.txt Fixes: d66d6c3152e8 ("net: sched: register noqueue qdisc") Cc: stable@vger.kernel.org Signed-off-by: Frederick Lawler Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20230109163906.706000-1-fred@cloudflare.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 4a27dfb1ba0f..c82532e20699 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1132,6 +1132,11 @@ skip: return -ENOENT; } + if (new && new->ops == &noqueue_qdisc_ops) { + NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class"); + return -EINVAL; + } + err = cops->graft(parent, cl, new, &old, extack); if (err) return err; From d6ad4bd1d896ae1daffd7628cd50f124280fb8b1 Mon Sep 17 00:00:00 2001 From: Clement Lecigne Date: Fri, 13 Jan 2023 13:07:45 +0100 Subject: [PATCH 11/17] ALSA: pcm: Move rwsem lock inside snd_ctl_elem_read to prevent UAF commit 56b88b50565cd8b946a2d00b0c83927b7ebb055e upstream. Takes rwsem lock inside snd_ctl_elem_read instead of snd_ctl_elem_read_user like it was done for write in commit 1fa4445f9adf1 ("ALSA: control - introduce snd_ctl_notify_one() helper"). Doing this way we are also fixing the following locking issue happening in the compat path which can be easily triggered and turned into an use-after-free. 64-bits: snd_ctl_ioctl snd_ctl_elem_read_user [takes controls_rwsem] snd_ctl_elem_read [lock properly held, all good] [drops controls_rwsem] 32-bits: snd_ctl_ioctl_compat snd_ctl_elem_write_read_compat ctl_elem_write_read snd_ctl_elem_read [missing lock, not good] CVE-2023-0266 was assigned for this issue. Cc: stable@kernel.org # 5.13+ Signed-off-by: Clement Lecigne Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20230113120745.25464-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/sound/core/control.c b/sound/core/control.c index 50e7ba66f187..82aa1af1d1d8 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1203,14 +1203,19 @@ static int snd_ctl_elem_read(struct snd_card *card, const u32 pattern = 0xdeadbeef; int ret; + down_read(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &control->id); - if (kctl == NULL) - return -ENOENT; + if (kctl == NULL) { + ret = -ENOENT; + goto unlock; + } index_offset = snd_ctl_get_ioff(kctl, &control->id); vd = &kctl->vd[index_offset]; - if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL) - return -EPERM; + if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL) { + ret = -EPERM; + goto unlock; + } snd_ctl_build_ioff(&control->id, kctl, index_offset); @@ -1220,7 +1225,7 @@ static int snd_ctl_elem_read(struct snd_card *card, info.id = control->id; ret = __snd_ctl_elem_info(card, kctl, &info, NULL); if (ret < 0) - return ret; + goto unlock; #endif if (!snd_ctl_skip_validation(&info)) @@ -1230,7 +1235,7 @@ static int snd_ctl_elem_read(struct snd_card *card, ret = kctl->get(kctl, control); snd_power_unref(card); if (ret < 0) - return ret; + goto unlock; if (!snd_ctl_skip_validation(&info) && sanity_check_elem_value(card, control, &info, pattern) < 0) { dev_err(card->dev, @@ -1238,8 +1243,11 @@ static int snd_ctl_elem_read(struct snd_card *card, control->id.iface, control->id.device, control->id.subdevice, control->id.name, control->id.index); - return -EINVAL; + ret = -EINVAL; + goto unlock; } +unlock: + up_read(&card->controls_rwsem); return ret; } @@ -1253,9 +1261,7 @@ static int snd_ctl_elem_read_user(struct snd_card *card, if (IS_ERR(control)) return PTR_ERR(control); - down_read(&card->controls_rwsem); result = snd_ctl_elem_read(card, control); - up_read(&card->controls_rwsem); if (result < 0) goto error; From 0e19738afc84fc2f8ed1ad1baa0373fcb8ebca36 Mon Sep 17 00:00:00 2001 From: Adrian Chan Date: Mon, 9 Jan 2023 16:05:20 -0500 Subject: [PATCH 12/17] ALSA: hda/hdmi: Add a HP device 0x8715 to force connect list commit de1ccb9e61728dd941fe0e955a7a129418657267 upstream. Add the 'HP Engage Flex Mini' device to the force connect list to enable audio through HDMI. Signed-off-by: Adrian Chan Cc: Link: https://lore.kernel.org/r/20230109210520.16060-1-adchan@google.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 386dd9d9143f..9ea633fe9339 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1981,6 +1981,7 @@ static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), + SND_PCI_QUIRK(0x103c, 0x8715, "HP", 1), SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1), {} From 57f6bea5218c37426ccf68b37fa66a0f476ef869 Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Thu, 5 Jan 2023 12:41:53 +0800 Subject: [PATCH 13/17] ALSA: hda/realtek: fix mute/micmute LEDs don't work for a HP platform commit 9c694fbfe6f36017b060ad74c7565cb379852e40 upstream. There is a HP platform uses ALC236 codec which using GPIO2 to control mute LED and GPIO1 to control micmute LED. Thus, add a quirk to make them work. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20230105044154.8242-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3794b522c222..9865cc5dd56c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9406,6 +9406,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 8107550bfe1189d7459292a66aab30a74c349cda Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 5 Jan 2023 10:35:30 +0100 Subject: [PATCH 14/17] ALSA: hda: cs35l41: Don't return -EINVAL from system suspend/resume commit 15a59cb0a3d6ddf2cb79f8dc3081b3130aad3767 upstream. The recent commit to support the system suspend for CS35L41 caused a regression on the models with CS35L41_EXT_BOOST_NO_VSPK_SWITC boost type, as the suspend/resume callbacks just return -EINVAL. This is eventually handled as a fatal error and blocks the whole system suspend/resume. For avoiding the problem, this patch corrects the return code from cs35l41_system_suspend() and _resume() to 0, and replace dev_err() with dev_err_once() for stop spamming too much. Fixes: 88672826e2a4 ("ALSA: hda: cs35l41: Support System Suspend") Cc: Link: https://lore.kernel.org/all/e6751ac2-34f3-d13f-13db-8174fade8308@pm.me Link: https://lore.kernel.org/r/20230105093531.16960-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/cs35l41_hda.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index e5f0549bf06d..c64420d389b0 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -598,8 +598,8 @@ static int cs35l41_system_suspend(struct device *dev) dev_dbg(cs35l41->dev, "System Suspend\n"); if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST_NO_VSPK_SWITCH) { - dev_err(cs35l41->dev, "System Suspend not supported\n"); - return -EINVAL; + dev_err_once(cs35l41->dev, "System Suspend not supported\n"); + return 0; /* don't block the whole system suspend */ } ret = pm_runtime_force_suspend(dev); @@ -624,8 +624,8 @@ static int cs35l41_system_resume(struct device *dev) dev_dbg(cs35l41->dev, "System Resume\n"); if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST_NO_VSPK_SWITCH) { - dev_err(cs35l41->dev, "System Resume not supported\n"); - return -EINVAL; + dev_err_once(cs35l41->dev, "System Resume not supported\n"); + return 0; /* don't block the whole system resume */ } if (cs35l41->reset_gpio) { From a6d58eff4f8367af588c49cbceed28147b1030c7 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Tue, 3 Jan 2023 17:53:32 +0800 Subject: [PATCH 15/17] ALSA: hda - Enable headset mic on another Dell laptop with ALC3254 commit a5751933a7f6abbdad90d98f25a25bb4b133a9e6 upstream. There is another Dell Latitude laptop (1028:0c03) with Realtek codec ALC3254 which needs the ALC269_FIXUP_DELL4_MIC_NO_PRESENCE instead of the default matched ALC269_FIXUP_DELL1_MIC_NO_PRESENCE. Apply correct fixup for this particular model to enable headset mic. Signed-off-by: Chris Chiu Cc: Link: https://lore.kernel.org/r/20230103095332.730677-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9865cc5dd56c..764eb07bbaff 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9239,6 +9239,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), + SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), From cb9b3951d6366c30f0085c9830381b1334cdcf62 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 5 Jan 2023 10:35:31 +0100 Subject: [PATCH 16/17] ALSA: hda: cs35l41: Check runtime suspend capability at runtime_idle commit ae50e2ab122cef68f46b7799fb9deffe3334f5e2 upstream. The runtime PM core checks with runtime_idle callback whether it can goes to the runtime suspend or not, and we can put the boost type check there instead of runtime_suspend and _resume calls. This will reduce the unnecessary runtime_suspend() calls. Fixes: 1873ebd30cc8 ("ALSA: hda: cs35l41: Support Hibernation during Suspend") Cc: Link: https://lore.kernel.org/r/20230105093531.16960-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/cs35l41_hda.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index c64420d389b0..a5b10a6a33a5 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -647,6 +647,15 @@ static int cs35l41_system_resume(struct device *dev) return ret; } +static int cs35l41_runtime_idle(struct device *dev) +{ + struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev); + + if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST_NO_VSPK_SWITCH) + return -EBUSY; /* suspend not supported yet on this model */ + return 0; +} + static int cs35l41_runtime_suspend(struct device *dev) { struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev); @@ -1536,7 +1545,8 @@ void cs35l41_hda_remove(struct device *dev) EXPORT_SYMBOL_NS_GPL(cs35l41_hda_remove, SND_HDA_SCODEC_CS35L41); const struct dev_pm_ops cs35l41_hda_pm_ops = { - RUNTIME_PM_OPS(cs35l41_runtime_suspend, cs35l41_runtime_resume, NULL) + RUNTIME_PM_OPS(cs35l41_runtime_suspend, cs35l41_runtime_resume, + cs35l41_runtime_idle) SYSTEM_SLEEP_PM_OPS(cs35l41_system_suspend, cs35l41_system_resume) }; EXPORT_SYMBOL_NS_GPL(cs35l41_hda_pm_ops, SND_HDA_SCODEC_CS35L41); From 38f3ee12661fdc2805e06942e4e3d604e03cd9cf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 14 Jan 2023 10:33:46 +0100 Subject: [PATCH 17/17] Linux 6.1.6 Link: https://lore.kernel.org/r/20230112135326.981869724@linuxfoundation.org Tested-by: Conor Dooley Tested-by: Ronald Warsow Tested-by: Shuah Khan Tested-by: Kelsey Steele Tested-by: Takeshi Ogasawara Tested-by: Guenter Roeck Tested-by: Bagas Sanjaya Tested-by: Ron Economos Tested-by: Sudip Mukherjee Tested-by: Jon Hunter Tested-by: Linux Kernel Functional Testing Tested-by: Allen Pais Tested-by: Rudi Heitbaum Tested-by: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ddbd2fc917c5..19e8c6dec6e5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 5 +SUBLEVEL = 6 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth