Re: [PATCH 03/28] KVM: x86/mmu: free up bit 10 of PTEs in preparation for MBEC

From: mlevitsk

Date: Tue Jun 02 2026 - 10:29:05 EST


On Tue, 2026-05-05 at 21:52 +0200, Paolo Bonzini wrote:
> From: Jon Kohler <jon@xxxxxxxxxxx>
>
> Update SPTE_MMIO_ALLOWED_MASK to allow EPT user executable (bit 10) to
> be treated like EPT RWX bit2:0, as when mode-based execute control is
> enabled, bit 10 can act like a "present" bit.  Likewise do not include
> it in FROZEN_SPTE.
>
> No functional changes intended, other than the reduction of the maximum
> MMIO generation that is stored in page tables.
>
> Cc: Kai Huang <kai.huang@xxxxxxxxx>
> Signed-off-by: Jon Kohler <jon@xxxxxxxxxxx>
> Message-ID: <20251223054806.1611168-4-jon@xxxxxxxxxxx>
> Reviewed-by: Kai Huang <kai.huang@xxxxxxxxx>
> Tested-by: David Riley <d.riley@xxxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
>  arch/x86/include/asm/vmx.h |  2 ++
>  arch/x86/kvm/mmu/spte.h    | 20 +++++++++++---------
>  2 files changed, 13 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index b2291a766e3f..2b30b921b375 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -560,10 +560,12 @@ enum vmcs_field {
>  #define VMX_EPT_ACCESS_BIT (1ull << 8)
>  #define VMX_EPT_DIRTY_BIT (1ull << 9)
>  #define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63)
> +
>  #define VMX_EPT_RWX_MASK                        (VMX_EPT_READABLE_MASK |       \
>   VMX_EPT_WRITABLE_MASK |       \
>   VMX_EPT_EXECUTABLE_MASK)
>  #define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT)
> +#define VMX_EPT_USER_EXECUTABLE_MASK (1ull << 10)
>  
>  static inline u8 vmx_eptp_page_walk_level(u64 eptp)
>  {
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index 28086fa86fe0..4283cea3e66c 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -96,11 +96,11 @@ static_assert(!(EPT_SPTE_MMU_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK));
>  #undef SHADOW_ACC_TRACK_SAVED_MASK
>  
>  /*
> - * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
> + * Due to limited space in PTEs, the MMIO generation is an 18 bit subset of
>   * the memslots generation and is derived as follows:
>   *
> - * Bits 0-7 of the MMIO generation are propagated to spte bits 3-10
> - * Bits 8-18 of the MMIO generation are propagated to spte bits 52-62
> + * Bits 0-6 of the MMIO generation are propagated to spte bits 3-9
> + * Bits 7-17 of the MMIO generation are propagated to spte bits 52-62
>   *
>   * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
>   * the MMIO generation number, as doing so would require stealing a bit from
> @@ -111,7 +111,7 @@ static_assert(!(EPT_SPTE_MMU_WRITABLE & SHADOW_ACC_TRACK_SAVED_MASK));
>   */
>  
>  #define MMIO_SPTE_GEN_LOW_START 3
> -#define MMIO_SPTE_GEN_LOW_END 10
> +#define MMIO_SPTE_GEN_LOW_END 9
>  
>  #define MMIO_SPTE_GEN_HIGH_START 52
>  #define MMIO_SPTE_GEN_HIGH_END 62
> @@ -133,7 +133,8 @@ static_assert(!(SPTE_MMU_PRESENT_MASK &
>   * and so they're off-limits for generation; additional checks ensure the mask
>   * doesn't overlap legal PA bits), and bit 63 (carved out for future usage).
>   */
> -#define SPTE_MMIO_ALLOWED_MASK (BIT_ULL(63) | GENMASK_ULL(51, 12) | GENMASK_ULL(2, 0))
> +#define SPTE_MMIO_ALLOWED_MASK (BIT_ULL(63) | GENMASK_ULL(51, 12) | \
> + BIT_ULL(10) | GENMASK_ULL(2, 0))
>  static_assert(!(SPTE_MMIO_ALLOWED_MASK &
>   (SPTE_MMU_PRESENT_MASK | MMIO_SPTE_GEN_LOW_MASK | MMIO_SPTE_GEN_HIGH_MASK)));
>  
> @@ -141,7 +142,7 @@ static_assert(!(SPTE_MMIO_ALLOWED_MASK &
>  #define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1)
>  
>  /* remember to adjust the comment above as well if you change these */
> -static_assert(MMIO_SPTE_GEN_LOW_BITS == 8 && MMIO_SPTE_GEN_HIGH_BITS == 11);
> +static_assert(MMIO_SPTE_GEN_LOW_BITS == 7 && MMIO_SPTE_GEN_HIGH_BITS == 11);
>  
>  #define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0)
>  #define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS)
> @@ -217,10 +218,11 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
>   *
>   * Only used by the TDP MMU.
>   */
> -#define FROZEN_SPTE (SHADOW_NONPRESENT_VALUE | 0x5a0ULL)
> +#define FROZEN_SPTE (SHADOW_NONPRESENT_VALUE | 0x1a0ULL)
>  
> -/* Frozen SPTEs must not be misconstrued as shadow present PTEs. */
> -static_assert(!(FROZEN_SPTE & SPTE_MMU_PRESENT_MASK));
> +/* Frozen SPTEs must not be misconstrued as shadow or MMU present PTEs. */
> +static_assert(!(FROZEN_SPTE & (SPTE_MMU_PRESENT_MASK |
> +        VMX_EPT_RWX_MASK | VMX_EPT_USER_EXECUTABLE_MASK)));
>  
>  static inline bool is_frozen_spte(u64 spte)
>  {

A note for myself for the future: document the effective format of NPT and EPT SPTEs, 
including the bits used by hardware, the ignored bits repurposed by KVM 
and the format of MMIO SPTEs.


I have created a draft of this already and I can prepare a patch for 
something like this to live in the Documentation/ folder.

Reviewed-by: Maxim Levitsky <mlevitsk@xxxxxxxxxx>
Best regards,
Maxim Levitsky