From 78eb724c376379f89ca8a9c7bbc098043dc76990 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Fri, 18 Nov 2022 14:01:04 +0000 Subject: [PATCH] ANDROID: KVM: arm64: pKVM module loading before deprivilege kvm-arm.protected_modules="" takes a list of modules that pKVM will load before de-privileging the host. This is necessary as no loading will be allowed later. We can't rely on request_module() that might be disabled by umh's configuration. Instead, create our own version, locked by the pKVM/KVM static keys and marked as __init to be cleared once the kernel init is done. Belt and braces. Keep the previous kvm-arm.protected_modules for compatibility. Bug: 254835242 Change-Id: Ia6881b4c7a60cf81d19ead12c5d4638a27eff3eb Signed-off-by: Vincent Donnefort --- .../admin-guide/kernel-parameters.txt | 7 + arch/arm64/include/asm/kvm_pkvm_module.h | 2 + arch/arm64/kvm/arm.c | 34 ---- arch/arm64/kvm/hyp/include/nvhe/modules.h | 6 +- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 +- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 3 + arch/arm64/kvm/hyp/nvhe/modules.c | 15 +- arch/arm64/kvm/pkvm.c | 151 +++++++++++++++++- 8 files changed, 179 insertions(+), 41 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d81bf9e55a91..ccc8b9a216ae 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2561,6 +2561,13 @@ mode to "protected" will disable kexec and hibernation for the host. + kvm-arm.protected_modules= + [KVM,ARM] List of pKVM modules to load before the host + is deprevileged. + + This option only applies when booting with + kvm-arm.mode=protected. + kvm-arm.vgic_v3_group0_trap= [KVM,ARM] Trap guest accesses to GICv3 group-0 system registers diff --git a/arch/arm64/include/asm/kvm_pkvm_module.h b/arch/arm64/include/asm/kvm_pkvm_module.h index a3f80bee9a5a..3dcc02bcd587 100644 --- a/arch/arm64/include/asm/kvm_pkvm_module.h +++ b/arch/arm64/include/asm/kvm_pkvm_module.h @@ -62,6 +62,8 @@ static inline int __pkvm_register_el2_call(unsigned long hfn_hyp_va) } #endif /* CONFIG_MODULES */ +int pkvm_load_early_modules(void); + #ifdef MODULE /* * Convert an EL2 module addr from the kernel VA to the hyp VA diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ec41142d8899..2c431074eef7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -48,7 +47,6 @@ #include static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; -DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); @@ -2234,38 +2232,6 @@ out_err: return err; } -static void _kvm_host_prot_finalize(void *arg) -{ - int *err = arg; - - if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) - WRITE_ONCE(*err, -EINVAL); -} - -static int pkvm_drop_host_privileges(void) -{ - int ret = 0; - - /* - * Flip the static key upfront as that may no longer be possible - * once the host stage 2 is installed. - */ - static_branch_enable(&kvm_protected_mode_initialized); - - /* - * Fixup the boot mode so that we don't take spurious round - * trips via EL2 on cpu_resume. Flush to the PoC for a good - * measure, so that it can be observed by a CPU coming out of - * suspend with the MMU off. - */ - __boot_cpu_mode[0] = __boot_cpu_mode[1] = BOOT_CPU_MODE_EL1; - dcache_clean_poc((unsigned long)__boot_cpu_mode, - (unsigned long)(__boot_cpu_mode + 2)); - - on_each_cpu(_kvm_host_prot_finalize, &ret, 1); - return ret; -} - struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) { struct kvm_vcpu *vcpu; diff --git a/arch/arm64/kvm/hyp/include/nvhe/modules.h b/arch/arm64/kvm/hyp/include/nvhe/modules.h index 6d95d5f015f8..0c3755ebe201 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/modules.h +++ b/arch/arm64/kvm/hyp/include/nvhe/modules.h @@ -17,7 +17,8 @@ int reset_pkvm_priv_hcall_limit(void); int __pkvm_init_module(void *module_init); int __pkvm_register_hcall(unsigned long hfn_hyp_va); int handle_host_dynamic_hcall(struct kvm_cpu_context *host_ctxt); -int __pkvm_close_module_registration(void); +int __pkvm_close_late_module_registration(void); +void __pkvm_close_module_registration(void); #else static inline int __pkvm_init_module(void *module_init) { return -EOPNOTSUPP; } static inline int @@ -26,5 +27,6 @@ static inline int handle_host_dynamic_hcall(struct kvm_cpu_context *host_ctxt) { return HCALL_UNHANDLED; } -static inline int __pkvm_close_module_registration(void) { return -EOPNOTSUPP; } +static inline int __pkvm_close_late_module_registration(void) { return -EOPNOTSUPP; } +static inline void __pkvm_close_module_registration(void) { } #endif diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index a70b039d5508..ab34c831f824 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -1217,7 +1217,7 @@ static void handle___pkvm_register_hcall(struct kvm_cpu_context *host_ctxt) static void handle___pkvm_close_module_registration(struct kvm_cpu_context *host_ctxt) { - cpu_reg(host_ctxt, 1) = __pkvm_close_module_registration(); + cpu_reg(host_ctxt, 1) = __pkvm_close_late_module_registration(); } static void handle___pkvm_load_tracing(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 74ef0d062719..4590e3d9d8dd 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -21,6 +21,7 @@ #include #include #include +#include #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) @@ -386,6 +387,8 @@ int __pkvm_prot_finalize(void) dsb(nsh); isb(); + __pkvm_close_module_registration(); + return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/modules.c b/arch/arm64/kvm/hyp/nvhe/modules.c index eb7b6680f2d1..be2a4db50017 100644 --- a/arch/arm64/kvm/hyp/nvhe/modules.c +++ b/arch/arm64/kvm/hyp/nvhe/modules.c @@ -42,14 +42,25 @@ static void __pkvm_linear_unmap_early(void *addr, size_t size) atomic_sub(size, &early_lm_pages); } -int __pkvm_close_module_registration(void) +void __pkvm_close_module_registration(void) { /* * Page ownership tracking might go out of sync if there are stale * entries in pKVM's linear map range, so they must really be gone by * now. */ - WARN_ON(atomic_read(&early_lm_pages)); + WARN_ON_ONCE(atomic_read(&early_lm_pages)); + + /* + * Nothing else to do, module loading HVCs are only accessible before + * deprivilege + */ +} + +int __pkvm_close_late_module_registration(void) +{ + __pkvm_close_module_registration(); + return reset_pkvm_priv_hcall_limit(); /* The fuse is blown! No way back until reset */ diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 1672c50fa1d1..ae3077f1a07d 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -18,9 +19,12 @@ #include #include #include +#include #include "hyp_constants.h" +DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); + static struct reserved_mem *pkvm_firmware_mem; static phys_addr_t *pvmfw_base = &kvm_nvhe_sym(pvmfw_base); static phys_addr_t *pvmfw_size = &kvm_nvhe_sym(pvmfw_size); @@ -448,6 +452,38 @@ static int __init pkvm_firmware_rmem_clear(void) return 0; } +static void _kvm_host_prot_finalize(void *arg) +{ + int *err = arg; + + if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) + WRITE_ONCE(*err, -EINVAL); +} + +static int pkvm_drop_host_privileges(void) +{ + int ret = 0; + + /* + * Flip the static key upfront as that may no longer be possible + * once the host stage 2 is installed. + */ + static_branch_enable(&kvm_protected_mode_initialized); + + /* + * Fixup the boot mode so that we don't take spurious round + * trips via EL2 on cpu_resume. Flush to the PoC for a good + * measure, so that it can be observed by a CPU coming out of + * suspend with the MMU off. + */ + __boot_cpu_mode[0] = __boot_cpu_mode[1] = BOOT_CPU_MODE_EL1; + dcache_clean_poc((unsigned long)__boot_cpu_mode, + (unsigned long)(__boot_cpu_mode + 2)); + + on_each_cpu(_kvm_host_prot_finalize, &ret, 1); + return ret; +} + static int __init finalize_pkvm(void) { int ret; @@ -457,6 +493,13 @@ static int __init finalize_pkvm(void) return 0; } + /* + * Modules can play an essential part in the pKVM protection. All of + * them must properly load to enable protected VMs. + */ + if (pkvm_load_early_modules()) + pkvm_firmware_rmem_clear(); + /* * Exclude HYP sections from kmemleak so that they don't get peeked * at, which would end badly once inaccessible. @@ -526,10 +569,15 @@ int pkvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) } #ifdef CONFIG_MODULES -static int __init early_pkvm_enable_modules(char *arg) +static char early_pkvm_modules[COMMAND_LINE_SIZE] __initdata; + +static int __init pkvm_enable_module_late_loading(void) { extern unsigned long kvm_nvhe_sym(pkvm_priv_hcall_limit); + WARN(1, "Loading pKVM modules with kvm-arm.protected_modules is deprecated\n" + "Use kvm-arm.protected_modules=,"); + /* * Move the limit to allow module loading HVCs. It will be moved back to * its original position in __pkvm_close_module_registration(). @@ -538,7 +586,106 @@ static int __init early_pkvm_enable_modules(char *arg) return 0; } -early_param("kvm-arm.protected_modules", early_pkvm_enable_modules); + +static int __init early_pkvm_modules_cfg(char *arg) +{ + if (!arg) + return pkvm_enable_module_late_loading(); + + strscpy(early_pkvm_modules, arg, COMMAND_LINE_SIZE); + + return 0; +} +early_param("kvm-arm.protected_modules", early_pkvm_modules_cfg); + +static void free_modprobe_argv(struct subprocess_info *info) +{ + kfree(info->argv[3]); + kfree(info->argv); +} + +/* + * Heavily inspired by request_module(). The latest couldn't be reused though as + * the feature can be disabled depending on umh configuration. Here some + * security is enforced by making sure this can be called only when pKVM is + * enabled, not yet completely initialized. + */ +static int __init pkvm_request_early_module(char *module_name) +{ + char *modprobe_path = CONFIG_MODPROBE_PATH; + struct subprocess_info *info; + static char *envp[] = { + "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL + }; + char **argv; + + if (!is_protected_kvm_enabled()) + return -EACCES; + + if (static_branch_likely(&kvm_protected_mode_initialized)) + return -EACCES; + + argv = kmalloc(sizeof(char *[5]), GFP_KERNEL); + if (!argv) + return -ENOMEM; + + module_name = kstrdup(module_name, GFP_KERNEL); + if (!module_name) + goto free_argv; + + argv[0] = modprobe_path; + argv[1] = "-q"; + argv[2] = "--"; + argv[3] = module_name; + argv[4] = NULL; + + info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL, + NULL, free_modprobe_argv, NULL); + if (!info) + goto free_module_name; + + /* Even with CONFIG_STATIC_USERMODEHELPER we really want this path */ + info->path = modprobe_path; + + return call_usermodehelper_exec(info, UMH_WAIT_PROC | UMH_KILLABLE); + +free_module_name: + kfree(module_name); +free_argv: + kfree(argv); + + return -ENOMEM; +} + +int __init pkvm_load_early_modules(void) +{ + char *token, *buf = early_pkvm_modules; + int err; + + while (true) { + token = strsep(&buf, ","); + + if (!token) + break; + + if (*token) { + err = pkvm_request_early_module(token); + if (err) { + pr_err("Failed to load pkvm module %s: %d\n", + token, err); + return err; + } + } + + if (buf) + *(buf - 1) = ','; + } + + return 0; +} struct pkvm_mod_sec_mapping { struct pkvm_module_section *sec;