Re: [PATCH 11/28] KVM: x86/mmu: pass pte_access for final nGPA->GPA walk
From: mlevitsk
Date: Tue Jun 02 2026 - 10:34:12 EST
On Tue, 2026-05-05 at 21:52 +0200, Paolo Bonzini wrote:
> The XS/XU bit for EPT are only applied to final accesses, and use the
> U bit from the page walk itself. This is available in the page walker
> as pte_access & ACC_USER_MASK but not available to translate_nested_gpa,
> so pass it down.
>
> Tested-by: David Riley <d.riley@xxxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> arch/x86/kvm/hyperv.c | 2 +-
> arch/x86/kvm/mmu.h | 15 ++++++++++++---
> arch/x86/kvm/mmu/mmu.c | 8 +++++++-
> arch/x86/kvm/mmu/paging_tmpl.h | 4 ++--
> arch/x86/kvm/mmu/spte.h | 6 ------
> arch/x86/kvm/x86.c | 5 +++--
> 6 files changed, 25 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index cf9dd565b894..53688f7b76eb 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -2042,7 +2042,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
> */
> if (!hc->fast && is_guest_mode(vcpu)) {
> hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa,
> - PFERR_GUEST_FINAL_MASK, NULL);
> + PFERR_GUEST_FINAL_MASK, NULL, 0);
> if (unlikely(hc->ingpa == INVALID_GPA))
> return HV_STATUS_INVALID_HYPERCALL_INPUT;
> }
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 23f37535c0ce..635c2e5d8513 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -37,6 +37,12 @@ extern bool __read_mostly enable_mmio_caching;
> #define PT32_ROOT_LEVEL 2
> #define PT32E_ROOT_LEVEL 3
>
> +#define ACC_READ_MASK PT_PRESENT_MASK
> +#define ACC_WRITE_MASK PT_WRITABLE_MASK
> +#define ACC_USER_MASK PT_USER_MASK
> +#define ACC_EXEC_MASK 8
> +#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | ACC_READ_MASK)
> +
> #define KVM_MMU_CR4_ROLE_BITS (X86_CR4_PSE | X86_CR4_PAE | X86_CR4_LA57 | \
> X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE)
>
> @@ -289,16 +295,19 @@ static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count)
> }
>
> gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access,
> - struct x86_exception *exception);
> + struct x86_exception *exception,
> + u64 pte_access);
>
> static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu,
> struct kvm_mmu *mmu,
> gpa_t gpa, u64 access,
> - struct x86_exception *exception)
> + struct x86_exception *exception,
> + u64 pte_access)
Hi,
IMHO the fact that we have now both 'access' and 'pte_access' is confusing,
and yet we have to do it in this way because this is how MBE works.
I think this needs a one large comment justifying the whole thing, including
explanation on what pte_access is and that it is only defined for access that
translate the final guest memory access (it is 0 otherwise)
Maybe, I would go even further and suggest to replace the pte_access
with a boolean for the MBE only (bool mapped_as_user for example),
hoping that MBE will be the only case that requires this, and
have a comment explaining this. I am not sure about this,
but I do think that a comment is warranted here.
Again, this is not our fault, but rather a result of MBE being defined like that.
Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Best regards,
Maxim Levitsky
> {
> if (mmu != &vcpu->arch.nested_mmu)
> return gpa;
> - return translate_nested_gpa(vcpu, gpa, access, exception);
> + return translate_nested_gpa(vcpu, gpa, access, exception,
> + pte_access);
> }
>
> static inline bool kvm_has_mirrored_tdp(const struct kvm *kvm)
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 46412e4d207f..3dbac7ad044f 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4348,8 +4348,14 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> {
> if (exception)
> exception->error_code = 0;
> + /*
> + * EPT MBEC uses the effective access bits from the PTE to distinguish
> + * user and supervisor accesses, and treats every linear address as a
> + * user-mode address if CR0.PG=0. Therefore *include* ACC_USER_MASK in
> + * the last argument to kvm_translate_gpa (which NPT does not use).
> + */
> return kvm_translate_gpa(vcpu, mmu, vaddr, access | PFERR_GUEST_FINAL_MASK,
> - exception);
> + exception, ACC_ALL);
> }
>
> static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 567f8b77ffe0..8dd9d510fc34 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -377,7 +377,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>
> real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(table_gfn),
> nested_access | PFERR_GUEST_PAGE_MASK,
> - &walker->fault);
> + &walker->fault, 0);
>
> /*
> * FIXME: This can happen if emulation (for of an INS/OUTS
> @@ -447,7 +447,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>
> real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(gfn),
> access | PFERR_GUEST_FINAL_MASK,
> - &walker->fault);
> + &walker->fault, walker->pte_access);
> if (real_gpa == INVALID_GPA)
> return 0;
>
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index 121bfb2217e8..8a4c09c5cdbf 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -52,12 +52,6 @@ static_assert(SPTE_TDP_AD_ENABLED == 0);
> #define SPTE_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
> #endif
>
> -#define ACC_READ_MASK PT_PRESENT_MASK
> -#define ACC_WRITE_MASK PT_WRITABLE_MASK
> -#define ACC_USER_MASK PT_USER_MASK
> -#define ACC_EXEC_MASK 8
> -#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | ACC_READ_MASK)
> -
> #define SPTE_LEVEL_BITS 9
> #define SPTE_LEVEL_SHIFT(level) __PT_LEVEL_SHIFT(level, SPTE_LEVEL_BITS)
> #define SPTE_INDEX(address, level) __PT_INDEX(address, level, SPTE_LEVEL_BITS)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ef1e3ae13887..67979b7de5d6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1073,7 +1073,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
> */
> real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(pdpt_gfn),
> PFERR_USER_MASK | PFERR_WRITE_MASK |
> - PFERR_GUEST_PAGE_MASK, NULL);
> + PFERR_GUEST_PAGE_MASK, NULL, 0);
> if (real_gpa == INVALID_GPA)
> return 0;
>
> @@ -7849,7 +7849,8 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
> }
>
> gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access,
> - struct x86_exception *exception)
> + struct x86_exception *exception,
> + u64 pte_access)
> {
> struct kvm_mmu *mmu = vcpu->arch.mmu;
> gpa_t t_gpa;