ANDROID: KVM: arm64: pKVM module loading before deprivilege

kvm-arm.protected_modules="" takes a list of modules that pKVM will load
before de-privileging the host. This is necessary as no loading will be
allowed later.

We can't rely on request_module() that might be disabled by umh's
configuration. Instead, create our own version, locked by the pKVM/KVM
static keys and marked as __init to be cleared once the kernel init is
done. Belt and braces.

Keep the previous kvm-arm.protected_modules for compatibility.

Bug: 254835242
Change-Id: Ia6881b4c7a60cf81d19ead12c5d4638a27eff3eb
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
This commit is contained in:
Vincent Donnefort 2022-11-18 14:01:04 +00:00
parent e3ad2ce3fb
commit 78eb724c37
8 changed files with 179 additions and 41 deletions

View file

@ -2561,6 +2561,13 @@
mode to "protected" will disable kexec and hibernation
for the host.
kvm-arm.protected_modules=
[KVM,ARM] List of pKVM modules to load before the host
is deprevileged.
This option only applies when booting with
kvm-arm.mode=protected.
kvm-arm.vgic_v3_group0_trap=
[KVM,ARM] Trap guest accesses to GICv3 group-0
system registers

View file

@ -62,6 +62,8 @@ static inline int __pkvm_register_el2_call(unsigned long hfn_hyp_va)
}
#endif /* CONFIG_MODULES */
int pkvm_load_early_modules(void);
#ifdef MODULE
/*
* Convert an EL2 module addr from the kernel VA to the hyp VA

View file

@ -16,7 +16,6 @@
#include <linux/fs.h>
#include <linux/mman.h>
#include <linux/sched.h>
#include <linux/kmemleak.h>
#include <linux/kvm.h>
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
@ -48,7 +47,6 @@
#include <kvm/arm_psci.h>
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
@ -2234,38 +2232,6 @@ out_err:
return err;
}
static void _kvm_host_prot_finalize(void *arg)
{
int *err = arg;
if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
WRITE_ONCE(*err, -EINVAL);
}
static int pkvm_drop_host_privileges(void)
{
int ret = 0;
/*
* Flip the static key upfront as that may no longer be possible
* once the host stage 2 is installed.
*/
static_branch_enable(&kvm_protected_mode_initialized);
/*
* Fixup the boot mode so that we don't take spurious round
* trips via EL2 on cpu_resume. Flush to the PoC for a good
* measure, so that it can be observed by a CPU coming out of
* suspend with the MMU off.
*/
__boot_cpu_mode[0] = __boot_cpu_mode[1] = BOOT_CPU_MODE_EL1;
dcache_clean_poc((unsigned long)__boot_cpu_mode,
(unsigned long)(__boot_cpu_mode + 2));
on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
return ret;
}
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
{
struct kvm_vcpu *vcpu;

View file

@ -17,7 +17,8 @@ int reset_pkvm_priv_hcall_limit(void);
int __pkvm_init_module(void *module_init);
int __pkvm_register_hcall(unsigned long hfn_hyp_va);
int handle_host_dynamic_hcall(struct kvm_cpu_context *host_ctxt);
int __pkvm_close_module_registration(void);
int __pkvm_close_late_module_registration(void);
void __pkvm_close_module_registration(void);
#else
static inline int __pkvm_init_module(void *module_init) { return -EOPNOTSUPP; }
static inline int
@ -26,5 +27,6 @@ static inline int handle_host_dynamic_hcall(struct kvm_cpu_context *host_ctxt)
{
return HCALL_UNHANDLED;
}
static inline int __pkvm_close_module_registration(void) { return -EOPNOTSUPP; }
static inline int __pkvm_close_late_module_registration(void) { return -EOPNOTSUPP; }
static inline void __pkvm_close_module_registration(void) { }
#endif

View file

@ -1217,7 +1217,7 @@ static void handle___pkvm_register_hcall(struct kvm_cpu_context *host_ctxt)
static void
handle___pkvm_close_module_registration(struct kvm_cpu_context *host_ctxt)
{
cpu_reg(host_ctxt, 1) = __pkvm_close_module_registration();
cpu_reg(host_ctxt, 1) = __pkvm_close_late_module_registration();
}
static void handle___pkvm_load_tracing(struct kvm_cpu_context *host_ctxt)

View file

@ -21,6 +21,7 @@
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
#include <nvhe/modules.h>
#define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP)
@ -386,6 +387,8 @@ int __pkvm_prot_finalize(void)
dsb(nsh);
isb();
__pkvm_close_module_registration();
return 0;
}

View file

@ -42,14 +42,25 @@ static void __pkvm_linear_unmap_early(void *addr, size_t size)
atomic_sub(size, &early_lm_pages);
}
int __pkvm_close_module_registration(void)
void __pkvm_close_module_registration(void)
{
/*
* Page ownership tracking might go out of sync if there are stale
* entries in pKVM's linear map range, so they must really be gone by
* now.
*/
WARN_ON(atomic_read(&early_lm_pages));
WARN_ON_ONCE(atomic_read(&early_lm_pages));
/*
* Nothing else to do, module loading HVCs are only accessible before
* deprivilege
*/
}
int __pkvm_close_late_module_registration(void)
{
__pkvm_close_module_registration();
return reset_pkvm_priv_hcall_limit();
/* The fuse is blown! No way back until reset */

View file

@ -5,6 +5,7 @@
*/
#include <linux/io.h>
#include <linux/kmemleak.h>
#include <linux/kvm_host.h>
#include <linux/memblock.h>
#include <linux/mm.h>
@ -18,9 +19,12 @@
#include <asm/kvm_mmu.h>
#include <asm/kvm_pkvm.h>
#include <asm/kvm_pkvm_module.h>
#include <asm/setup.h>
#include "hyp_constants.h"
DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
static struct reserved_mem *pkvm_firmware_mem;
static phys_addr_t *pvmfw_base = &kvm_nvhe_sym(pvmfw_base);
static phys_addr_t *pvmfw_size = &kvm_nvhe_sym(pvmfw_size);
@ -448,6 +452,38 @@ static int __init pkvm_firmware_rmem_clear(void)
return 0;
}
static void _kvm_host_prot_finalize(void *arg)
{
int *err = arg;
if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
WRITE_ONCE(*err, -EINVAL);
}
static int pkvm_drop_host_privileges(void)
{
int ret = 0;
/*
* Flip the static key upfront as that may no longer be possible
* once the host stage 2 is installed.
*/
static_branch_enable(&kvm_protected_mode_initialized);
/*
* Fixup the boot mode so that we don't take spurious round
* trips via EL2 on cpu_resume. Flush to the PoC for a good
* measure, so that it can be observed by a CPU coming out of
* suspend with the MMU off.
*/
__boot_cpu_mode[0] = __boot_cpu_mode[1] = BOOT_CPU_MODE_EL1;
dcache_clean_poc((unsigned long)__boot_cpu_mode,
(unsigned long)(__boot_cpu_mode + 2));
on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
return ret;
}
static int __init finalize_pkvm(void)
{
int ret;
@ -457,6 +493,13 @@ static int __init finalize_pkvm(void)
return 0;
}
/*
* Modules can play an essential part in the pKVM protection. All of
* them must properly load to enable protected VMs.
*/
if (pkvm_load_early_modules())
pkvm_firmware_rmem_clear();
/*
* Exclude HYP sections from kmemleak so that they don't get peeked
* at, which would end badly once inaccessible.
@ -526,10 +569,15 @@ int pkvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
}
#ifdef CONFIG_MODULES
static int __init early_pkvm_enable_modules(char *arg)
static char early_pkvm_modules[COMMAND_LINE_SIZE] __initdata;
static int __init pkvm_enable_module_late_loading(void)
{
extern unsigned long kvm_nvhe_sym(pkvm_priv_hcall_limit);
WARN(1, "Loading pKVM modules with kvm-arm.protected_modules is deprecated\n"
"Use kvm-arm.protected_modules=<module1>,<module2>");
/*
* Move the limit to allow module loading HVCs. It will be moved back to
* its original position in __pkvm_close_module_registration().
@ -538,7 +586,106 @@ static int __init early_pkvm_enable_modules(char *arg)
return 0;
}
early_param("kvm-arm.protected_modules", early_pkvm_enable_modules);
static int __init early_pkvm_modules_cfg(char *arg)
{
if (!arg)
return pkvm_enable_module_late_loading();
strscpy(early_pkvm_modules, arg, COMMAND_LINE_SIZE);
return 0;
}
early_param("kvm-arm.protected_modules", early_pkvm_modules_cfg);
static void free_modprobe_argv(struct subprocess_info *info)
{
kfree(info->argv[3]);
kfree(info->argv);
}
/*
* Heavily inspired by request_module(). The latest couldn't be reused though as
* the feature can be disabled depending on umh configuration. Here some
* security is enforced by making sure this can be called only when pKVM is
* enabled, not yet completely initialized.
*/
static int __init pkvm_request_early_module(char *module_name)
{
char *modprobe_path = CONFIG_MODPROBE_PATH;
struct subprocess_info *info;
static char *envp[] = {
"HOME=/",
"TERM=linux",
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
NULL
};
char **argv;
if (!is_protected_kvm_enabled())
return -EACCES;
if (static_branch_likely(&kvm_protected_mode_initialized))
return -EACCES;
argv = kmalloc(sizeof(char *[5]), GFP_KERNEL);
if (!argv)
return -ENOMEM;
module_name = kstrdup(module_name, GFP_KERNEL);
if (!module_name)
goto free_argv;
argv[0] = modprobe_path;
argv[1] = "-q";
argv[2] = "--";
argv[3] = module_name;
argv[4] = NULL;
info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
NULL, free_modprobe_argv, NULL);
if (!info)
goto free_module_name;
/* Even with CONFIG_STATIC_USERMODEHELPER we really want this path */
info->path = modprobe_path;
return call_usermodehelper_exec(info, UMH_WAIT_PROC | UMH_KILLABLE);
free_module_name:
kfree(module_name);
free_argv:
kfree(argv);
return -ENOMEM;
}
int __init pkvm_load_early_modules(void)
{
char *token, *buf = early_pkvm_modules;
int err;
while (true) {
token = strsep(&buf, ",");
if (!token)
break;
if (*token) {
err = pkvm_request_early_module(token);
if (err) {
pr_err("Failed to load pkvm module %s: %d\n",
token, err);
return err;
}
}
if (buf)
*(buf - 1) = ',';
}
return 0;
}
struct pkvm_mod_sec_mapping {
struct pkvm_module_section *sec;