Re: [PATCH 11/28] KVM: x86/mmu: pass pte_access for final nGPA->GPA walk

From: mlevitsk

Date: Tue Jun 02 2026 - 10:34:12 EST


On Tue, 2026-05-05 at 21:52 +0200, Paolo Bonzini wrote:
> The XS/XU bit for EPT are only applied to final accesses, and use the
> U bit from the page walk itself.  This is available in the page walker
> as pte_access & ACC_USER_MASK but not available to translate_nested_gpa,
> so pass it down.
>
> Tested-by: David Riley <d.riley@xxxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
>  arch/x86/kvm/hyperv.c          |  2 +-
>  arch/x86/kvm/mmu.h             | 15 ++++++++++++---
>  arch/x86/kvm/mmu/mmu.c         |  8 +++++++-
>  arch/x86/kvm/mmu/paging_tmpl.h |  4 ++--
>  arch/x86/kvm/mmu/spte.h        |  6 ------
>  arch/x86/kvm/x86.c             |  5 +++--
>  6 files changed, 25 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index cf9dd565b894..53688f7b76eb 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -2042,7 +2042,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>   */
>   if (!hc->fast && is_guest_mode(vcpu)) {
>   hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa,
> - PFERR_GUEST_FINAL_MASK, NULL);
> + PFERR_GUEST_FINAL_MASK, NULL, 0);
>   if (unlikely(hc->ingpa == INVALID_GPA))
>   return HV_STATUS_INVALID_HYPERCALL_INPUT;
>   }
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 23f37535c0ce..635c2e5d8513 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -37,6 +37,12 @@ extern bool __read_mostly enable_mmio_caching;
>  #define PT32_ROOT_LEVEL 2
>  #define PT32E_ROOT_LEVEL 3
>  
> +#define ACC_READ_MASK    PT_PRESENT_MASK
> +#define ACC_WRITE_MASK   PT_WRITABLE_MASK
> +#define ACC_USER_MASK    PT_USER_MASK
> +#define ACC_EXEC_MASK    8
> +#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | ACC_READ_MASK)
> +
>  #define KVM_MMU_CR4_ROLE_BITS (X86_CR4_PSE | X86_CR4_PAE | X86_CR4_LA57 | \
>          X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE)
>  
> @@ -289,16 +295,19 @@ static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count)
>  }
>  
>  gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access,
> -    struct x86_exception *exception);
> +    struct x86_exception *exception,
> +    u64 pte_access);
>  
>  static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu,
>         struct kvm_mmu *mmu,
>         gpa_t gpa, u64 access,
> -       struct x86_exception *exception)
> +       struct x86_exception *exception,
> +       u64 pte_access)

Hi,

IMHO the fact that we have now both 'access' and 'pte_access' is confusing,
and yet we have to do it in this way because this is how MBE works.

I think this needs a one large comment justifying the whole thing, including
explanation on what pte_access is and that it is only defined for access that
translate the final guest memory access (it is 0 otherwise)

Maybe, I would go even further and suggest to replace the pte_access
with a boolean for the MBE only (bool mapped_as_user for example), 
hoping that MBE will be the only case that requires this, and
have a comment explaining this. I am not sure about this,
but I do think that a comment is warranted here.


Again, this is not our fault, but rather a result of MBE being defined like that.

Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Best regards,
Maxim Levitsky



>  {
>   if (mmu != &vcpu->arch.nested_mmu)
>   return gpa;
> - return translate_nested_gpa(vcpu, gpa, access, exception);
> + return translate_nested_gpa(vcpu, gpa, access, exception,
> +     pte_access);
>  }
>  
>  static inline bool kvm_has_mirrored_tdp(const struct kvm *kvm)
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 46412e4d207f..3dbac7ad044f 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4348,8 +4348,14 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>  {
>   if (exception)
>   exception->error_code = 0;
> + /*
> + * EPT MBEC uses the effective access bits from the PTE to distinguish
> + * user and supervisor accesses, and treats every linear address as a
> + * user-mode address if CR0.PG=0.  Therefore *include* ACC_USER_MASK in
> + * the last argument to kvm_translate_gpa (which NPT does not use).
> + */
>   return kvm_translate_gpa(vcpu, mmu, vaddr, access | PFERR_GUEST_FINAL_MASK,
> - exception);
> + exception, ACC_ALL);
>  }
>  
>  static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 567f8b77ffe0..8dd9d510fc34 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -377,7 +377,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>  
>   real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(table_gfn),
>        nested_access | PFERR_GUEST_PAGE_MASK,
> -      &walker->fault);
> +      &walker->fault, 0);
>  
>   /*
>   * FIXME: This can happen if emulation (for of an INS/OUTS
> @@ -447,7 +447,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>  
>   real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(gfn),
>        access | PFERR_GUEST_FINAL_MASK,
> -      &walker->fault);
> +      &walker->fault, walker->pte_access);
>   if (real_gpa == INVALID_GPA)
>   return 0;
>  
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index 121bfb2217e8..8a4c09c5cdbf 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -52,12 +52,6 @@ static_assert(SPTE_TDP_AD_ENABLED == 0);
>  #define SPTE_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
>  #endif
>  
> -#define ACC_READ_MASK    PT_PRESENT_MASK
> -#define ACC_WRITE_MASK   PT_WRITABLE_MASK
> -#define ACC_USER_MASK    PT_USER_MASK
> -#define ACC_EXEC_MASK    8
> -#define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK | ACC_READ_MASK)
> -
>  #define SPTE_LEVEL_BITS 9
>  #define SPTE_LEVEL_SHIFT(level) __PT_LEVEL_SHIFT(level, SPTE_LEVEL_BITS)
>  #define SPTE_INDEX(address, level) __PT_INDEX(address, level, SPTE_LEVEL_BITS)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index ef1e3ae13887..67979b7de5d6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1073,7 +1073,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
>   */
>   real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(pdpt_gfn),
>        PFERR_USER_MASK | PFERR_WRITE_MASK |
> -      PFERR_GUEST_PAGE_MASK, NULL);
> +      PFERR_GUEST_PAGE_MASK, NULL, 0);
>   if (real_gpa == INVALID_GPA)
>   return 0;
>  
> @@ -7849,7 +7849,8 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
>  }
>  
>  gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access,
> -    struct x86_exception *exception)
> +    struct x86_exception *exception,
> +    u64 pte_access)
>  {
>   struct kvm_mmu *mmu = vcpu->arch.mmu;
>   gpa_t t_gpa;