Re: [PATCH 21/28] KVM: x86/mmu: propagate access mask from root pages down

From: mlevitsk

Date: Tue Jun 02 2026 - 11:12:05 EST

On Tue, 2026-05-05 at 21:52 +0200, Paolo Bonzini wrote:
> Until now, all SPTEs have had all kinds of access allowed; however,
> for GMET to be enabled all the pages have to have ACC_USER_MASK
> disabled. By marking them as supervisor pages, the processor
> allows execution from either user or supervisor mode (unlike
> for normal paging, NPT ignores the U bit for reads and writes).

Hi!

AFAIK, according to the AMD's APM, NPT forces U bit to be 1, so it might be worth it to
update the commit message to state that NPT ignores U bit when GMET is enabled.

"A page is considered user in the guest only if it's marked as user at the guest level.
The page must be marked user in the nested page table to allow any guest access at all."

I haven't found anything else about this in the APM (Rev. 3.44—March 2026).
Using common sense, I agree that with GMET enabled, the U bit must be ignored for read/writes though,
but once again, I haven't found anything about it in the APM.

> That will mean that the root page's role has ACC_USER_MASK
> cleared and that has to be propagated down through the kvm_mmu_page
> tree.
>
> Do that, and pass the required access to the
> kvm_mmu_spte_requested tracepoint since it's not ACC_ALL
> anymore.

It might be worth it to mention that other than changes to the tracepoints,
this patch doesn't yet change any functionality (everything is still ACC_ALL)

Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>

Best regards,
Maxim Levitsky

>
> Tested-by: David Riley <d.riley@xxxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
> arch/x86/kvm/mmu/mmu.c         | 9 +++++----
> arch/x86/kvm/mmu/mmutrace.h    | 10 ++++++----
> arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
> arch/x86/kvm/mmu/tdp_mmu.c     | 6 +++---
> 4 files changed, 15 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index ededc26c6675..156bab8afbc6 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3446,12 +3446,13 @@ static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> {
> struct kvm_shadow_walk_iterator it;
> struct kvm_mmu_page *sp;
> - int ret;
> + int ret, access;
> gfn_t base_gfn = fault->gfn;
>
> kvm_mmu_hugepage_adjust(vcpu, fault);
>
> - trace_kvm_mmu_spte_requested(fault);
> + access = vcpu->arch.mmu->root_role.access;
> + trace_kvm_mmu_spte_requested(fault, access);
> for_each_shadow_entry(vcpu, fault->addr, it) {
> /*
> * We cannot overwrite existing page tables with an NX
> @@ -3464,7 +3465,7 @@ static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> if (it.level == fault->goal_level)
> break;
>
> - sp = kvm_mmu_get_child_sp(vcpu, it.sptep, base_gfn, true, ACC_ALL);
> + sp = kvm_mmu_get_child_sp(vcpu, it.sptep, base_gfn, true, access);
> if (sp == ERR_PTR(-EEXIST))
> continue;
>
> @@ -3477,7 +3478,7 @@ static int direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
> if (WARN_ON_ONCE(it.level != fault->goal_level))
> return -EFAULT;
>
> - ret = mmu_set_spte(vcpu, fault->slot, it.sptep, ACC_ALL,
> + ret = mmu_set_spte(vcpu, fault->slot, it.sptep, access,
>    base_gfn, fault->pfn, fault);
> if (ret == RET_PF_SPURIOUS)
> return ret;
> diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
> index 3429c1413f42..fa01719baf8d 100644
> --- a/arch/x86/kvm/mmu/mmutrace.h
> +++ b/arch/x86/kvm/mmu/mmutrace.h
> @@ -373,23 +373,25 @@ TRACE_EVENT(
>
> TRACE_EVENT(
> kvm_mmu_spte_requested,
> - TP_PROTO(struct kvm_page_fault *fault),
> - TP_ARGS(fault),
> + TP_PROTO(struct kvm_page_fault *fault, u8 access),
> + TP_ARGS(fault, access),
>
> TP_STRUCT__entry(
> __field(u64, gfn)
> __field(u64, pfn)
> __field(u8, level)
> + __field(u8, access)
> ),
>
> TP_fast_assign(
> __entry->gfn = fault->gfn;
> __entry->pfn = fault->pfn | (fault->gfn & (KVM_PAGES_PER_HPAGE(fault->goal_level) - 1));
> __entry->level = fault->goal_level;
> + __entry->access = access;
> ),
>
> - TP_printk("gfn %llx pfn %llx level %d",
> - __entry->gfn, __entry->pfn, __entry->level
> + TP_printk("gfn %llx pfn %llx level %d access %x",
> + __entry->gfn, __entry->pfn, __entry->level, __entry->access
> )
> );
>
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index f741f7d4cc2d..047400af924d 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -734,7 +734,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
> */
> kvm_mmu_hugepage_adjust(vcpu, fault);
>
> - trace_kvm_mmu_spte_requested(fault);
> + trace_kvm_mmu_spte_requested(fault, gw->pte_access);
>
> for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
> /*
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 7b1102d26f9c..5a2f8ce9a32b 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -1185,9 +1185,9 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
> }
>
> if (unlikely(!fault->slot))
> - new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
> + new_spte = make_mmio_spte(vcpu, iter->gfn, sp->role.access);
> else
> - wrprot = make_spte(vcpu, sp, fault->slot, ACC_ALL, iter->gfn,
> + wrprot = make_spte(vcpu, sp, fault->slot, sp->role.access, iter->gfn,
>    fault->pfn, iter->old_spte, fault->prefetch,
>    false, fault->map_writable, &new_spte);
>
> @@ -1272,7 +1272,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>
> kvm_mmu_hugepage_adjust(vcpu, fault);
>
> - trace_kvm_mmu_spte_requested(fault);
> + trace_kvm_mmu_spte_requested(fault, root->role.access);
>
> rcu_read_lock();
>