From dd621869c1e65e93edd43388d10bf6c5fade3079 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 24 Jul 2023 19:54:05 +0100 Subject: [PATCH] BACKPORT: FROMGIT: mm: handle some PMD faults under the VMA lock Push the VMA_LOCK check down from __handle_mm_fault() to handle_pte_fault(). Once again, we refuse to call ->huge_fault() with the VMA lock held, but we will wait for a PMD migration entry with the VMA lock held, handle NUMA migration and set the accessed bit. We were already doing this for anonymous VMAs, so it should be safe. Link: https://lkml.kernel.org/r/20230724185410.1124082-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Arjun Roy Cc: Eric Dumazet Cc: Punit Agrawal Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton (cherry picked from commit b7b8f56db92f56ce812e305f84aef0404287b534 https: //git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-unstable) [surenb: resolved merge conflicts in create_huge_pmd() and wp_huge_pmd()] Bug: 293665307 Change-Id: I3ec9042b2e39a5caf6b6f3a478bf9ba337012aa4 Signed-off-by: Suren Baghdasaryan --- mm/memory.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index de54cc5d3c75..2f1b1ad8c5e4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4861,33 +4861,45 @@ out_map: static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) { - if (vma_is_anonymous(vmf->vma)) + struct vm_area_struct *vma = vmf->vma; + if (vma_is_anonymous(vma)) return do_huge_pmd_anonymous_page(vmf); - if (vmf->vma->vm_ops->huge_fault) - return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); + if (vma->vm_ops->huge_fault) { + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + vma_end_read(vma); + return VM_FAULT_RETRY; + } + return vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); + } return VM_FAULT_FALLBACK; } /* `inline' is required to avoid gcc 4.1.2 build error */ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; + vm_fault_t ret; - if (vma_is_anonymous(vmf->vma)) { + if (vma_is_anonymous(vma)) { if (likely(!unshare) && - userfaultfd_huge_pmd_wp(vmf->vma, vmf->orig_pmd)) + userfaultfd_huge_pmd_wp(vma, vmf->orig_pmd)) return handle_userfault(vmf, VM_UFFD_WP); return do_huge_pmd_wp_page(vmf); } - if (vmf->vma->vm_ops->huge_fault) { - vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); + if (vma->vm_ops->huge_fault) { + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + vma_end_read(vma); + return VM_FAULT_RETRY; + } + ret = vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); if (!(ret & VM_FAULT_FALLBACK)) return ret; } /* COW or write-notify handled on pte level: split pmd. */ - __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL); + __split_huge_pmd(vma, vmf->pmd, vmf->address, false, NULL); return VM_FAULT_FALLBACK; } @@ -4957,6 +4969,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { pte_t entry; + if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vmf->vma)) { + vma_end_read(vmf->vma); + return VM_FAULT_RETRY; + } + if (unlikely(pmd_none(*vmf->pmd))) { /* * Leave __pte_alloc() until later: because vm_ops->fault may @@ -5120,11 +5137,6 @@ retry_pud: if (pud_trans_unstable(vmf.pud)) goto retry_pud; - if ((flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vma)) { - vma_end_read(vma); - return VM_FAULT_RETRY; - } - if (pmd_none(*vmf.pmd) && hugepage_vma_check(vma, vm_flags, false, true, true)) { ret = create_huge_pmd(&vmf);