Re: [PATCH V3 3/4] KVM: VMX: Fix nested EPT violation injection of GVA_IS_VALID/GVA_TRANSLATED bits

From: Sean Christopherson

Date: Fri May 22 2026 - 18:17:11 EST


On Fri, Mar 13, 2026, Kevin Cheng wrote:
> Make the OR of EPT_VIOLATION_GVA_IS_VALID and
> EPT_VIOLATION_GVA_TRANSLATED from the hardware exit qualification
> conditional on the fault originating from a hardware EPT violation
> exit. The hardware exit qualification reflects the original VM exit,
> which may not be an EPT violation at all, e.g. if KVM is emulating
> an I/O instruction and the memory operand's translation through L1's
> EPT fails. In that case, bits 7-8 of the exit qualification have
> completely different semantics (or are simply zero), and OR'ing them
> into the injected EPT violation corrupts the GVA_IS_VALID/
> GVA_TRANSLATED information.
>
> Use the hardware_nested_page_fault flag introduced in the previous
> patch to distinguish hardware EPT violation exits from
> emulation-triggered faults. For hardware exits, take the
> GVA_IS_VALID/GVA_TRANSLATED bits from the hardware exit qualification.
> For emulation faults, take them from fault->exit_qualification, which
> is populated by the nested_mmu walker in paging_tmpl.h.
>
> Replace the #if PTTYPE != PTTYPE_EPT preprocessor guards in
> paging_tmpl.h with a runtime kvm_nested_fault_is_ept() helper that
> checks guest_mmu to determine whether the nested fault is EPT vs NPT,
> and sets the appropriate field (exit_qualification for EPT, error_code
> for NPF) accordingly.

Same comments on the changelog.

> Signed-off-by: Kevin Cheng <chengkev@xxxxxxxxxx>
> ---
> arch/x86/kvm/mmu/mmu.c | 10 ++++++++++
> arch/x86/kvm/mmu/paging_tmpl.h | 22 +++++++++++++++-------
> arch/x86/kvm/vmx/nested.c | 9 +++++----
> 3 files changed, 30 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 3dce38ffee76..aabf4ac39c43 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -5272,6 +5272,9 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
> return false;
> }
>
> +static bool kvm_nested_fault_is_ept(struct kvm_vcpu *vcpu,
> + struct x86_exception *exception);
> +
> #define PTTYPE_EPT 18 /* arbitrary */
> #define PTTYPE PTTYPE_EPT
> #include "paging_tmpl.h"
> @@ -5285,6 +5288,13 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
> #include "paging_tmpl.h"
> #undef PTTYPE
>
> +static bool kvm_nested_fault_is_ept(struct kvm_vcpu *vcpu,
> + struct x86_exception *exception)
> +{
> + WARN_ON_ONCE(!exception->nested_page_fault);
> + return vcpu->arch.guest_mmu.page_fault == ept_page_fault;

Happily, on top the MBEC+GMET support, this goes away.

> +}
> +
> static void __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check,
> u64 pa_bits_rsvd, int level, bool nx,
> bool gbpages, bool pse, bool amd)
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index ea2b7569f8a4..15be93d735ab 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -386,9 +386,15 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> nested_access, &walker->fault);
>
> if (unlikely(real_gpa == INVALID_GPA)) {
> -#if PTTYPE != PTTYPE_EPT
> - walker->fault.error_code |= PFERR_GUEST_PAGE_MASK;
> -#endif
> + /*
> + * Set EPT Violation flags even if the fault is an
> + * EPT Misconfig, fault.exit_qualification is ignored
> + * for EPT Misconfigs.
> + */
> + if (kvm_nested_fault_is_ept(vcpu, &walker->fault))
> + walker->fault.exit_qualification |= EPT_VIOLATION_GVA_IS_VALID;
> + else
> + walker->fault.error_code |= PFERR_GUEST_PAGE_MASK;
> return 0;
> }
>
> @@ -447,9 +453,11 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>
> real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(gfn), access, &walker->fault);
> if (real_gpa == INVALID_GPA) {
> -#if PTTYPE != PTTYPE_EPT
> - walker->fault.error_code |= PFERR_GUEST_FINAL_MASK;
> -#endif
> + if (kvm_nested_fault_is_ept(vcpu, &walker->fault))
> + walker->fault.exit_qualification |= EPT_VIOLATION_GVA_IS_VALID |
> + EPT_VIOLATION_GVA_TRANSLATED;
> + else
> + walker->fault.error_code |= PFERR_GUEST_FINAL_MASK;
> return 0;

And these become:

diff --git arch/x86/kvm/mmu/paging_tmpl.h arch/x86/kvm/mmu/paging_tmpl.h
index 5b2410ed7e45..b3a2f7b59797 100644
--- arch/x86/kvm/mmu/paging_tmpl.h
+++ arch/x86/kvm/mmu/paging_tmpl.h
@@ -502,7 +502,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
* [2:0] - Derive from the access bits. The exit_qualification might be
* out of date if it is serving an EPT misconfiguration.
* [5:3] - Calculated by the page walk of the guest EPT page tables
- * [7:11] - Derived from [7:11] of real exit_qualification
+ * [7:8] - Dervived from "fault stage" access bits
+ * [9:11] - Derived from [9:11] of real exit_qualification
*
* The other bits are set to 0.
*/
@@ -516,6 +517,14 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
else
walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ;

+ /*
+ * KVM doesn't emulate features that access GPAs directly, e.g.
+ * Intel Processor Trace. Assume the GVA is always valid; when
+ * propagating faults from hardware, KVM will discard this info
+ * and use the EXIT_QUALIFICATION bits from the VMCS.
+ */
+ walker->fault.exit_qualification |= EPT_VIOLATION_GVA_IS_VALID;
+
/*
* Accesses to guest paging structures are either "reads" or
* "read+write" accesses, so consider them the latter if write_fault
@@ -523,6 +532,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
*/
if (access & PFERR_GUEST_PAGE_MASK)
walker->fault.exit_qualification |= EPT_VIOLATION_ACC_READ;
+ else
+ walker->fault.exit_qualification |= EPT_VIOLATION_GVA_TRANSLATED;

/*
* Note, pte_access holds the raw RWX bits from the EPTE, not

> }
>
> @@ -496,7 +504,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
> * [2:0] - Derive from the access bits. The exit_qualification might be
> * out of date if it is serving an EPT misconfiguration.
> * [5:3] - Calculated by the page walk of the guest EPT page tables
> - * [7:8] - Derived from [7:8] of real exit_qualification
> + * [7:8] - Set at the kvm_translate_gpa() call sites above
> *
> * The other bits are set to 0.
> */
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 937aeb474af7..39f8504f5cf2 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -443,11 +443,12 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
> vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
> exit_qualification = 0;
> } else {
> - exit_qualification = fault->exit_qualification;
> - exit_qualification |= vmx_get_exit_qual(vcpu) &
> - (EPT_VIOLATION_GVA_IS_VALID |
> - EPT_VIOLATION_GVA_TRANSLATED);
> vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
> + exit_qualification = fault->exit_qualification;
> + if (fault->hardware_nested_page_fault)
> + exit_qualification |= vmx_get_exit_qual(vcpu) &
> + (EPT_VIOLATION_GVA_IS_VALID |
> + EPT_VIOLATION_GVA_TRANSLATED);

Similar to the goof in NPT, effectively merging emulated and hardware information
is wrong. On top of the MBEC+GMET changes:

u64 mask = EPT_VIOLATION_GVA_IS_VALID |
EPT_VIOLATION_GVA_TRANSLATED;

if (vmx->nested.msrs.ept_caps & VMX_EPT_ADVANCED_VMEXIT_INFO_BIT)
mask |= EPT_VIOLATION_GVA_USER |
EPT_VIOLATION_GVA_WRITABLE |
EPT_VIOLATION_GVA_NX;

exit_qualification = fault->exit_qualification & ~mask;

/*
* Use the EXIT_QUALIFICATION from the VMCS if and only
* if the hardware VM-Exit from L2 was an EPT Violation.
* If the fault is synthesized, then EXIT_QUALIFICATION
* is stale and/or holds entirely different data. And
* conversely, KVM _must_ rely on EXIT_QUALIFICATION if
* the fault came from hardware, because KVM only sees
* and walks the faulting GPA.
*/
if (from_hardware)
exit_qualification |= vmx_get_exit_qual(vcpu) & mask;
else
exit_qualification |= fault->exit_qualification & mask;

vm_exit_reason = EXIT_REASON_EPT_VIOLATION;