Re: [PATCH 2/2] x86/virt/tdx: Use PFN directly for unmapping guest private memory

From: Yan Zhao

Date: Thu Mar 19 2026 - 03:26:39 EST


On Thu, Mar 19, 2026 at 11:20:48AM +0800, Xiaoyao Li wrote:
> On 3/19/2026 8:58 AM, Yan Zhao wrote:
> > From: Sean Christopherson <seanjc@xxxxxxxxxx>
> >
> > Remove the completely unnecessary assumptions that memory unmapped from a
> > TDX guest is backed by refcounted struct page memory.
> >
> > APIs tdh_phymem_page_wbinvd_hkid(), tdx_quirk_reset_page() are used when
> > unmapping guest private memory from S-EPT. Since mapping of guest private
> > memory places no requirements on how KVM and guest_memfd manage memory,
> > neither does guest private memory unmapping.
> >
> > Rip out the misguided struct page assumptions/constraints by having the two
> > APIs take PFN directly. This ensures that for future huge page support in
> > S-EPT, the kernel doesn't pick up even worse assumptions like "a hugepage
> > must be contained in a single folio".
> >
> > Use "kvm_pfn_t pfn" for type safety. Using this KVM type is appropriate
> > since APIs tdh_phymem_page_wbinvd_hkid() and tdx_quirk_reset_page() are
> > exported to KVM only.
> >
> > Update mk_keyed_paddr(), which is invoked by tdh_phymem_page_wbinvd_hkid(),
> > to take PFN as parameter accordingly. Opportunistically, move
> > mk_keyed_paddr() from tdx.h to tdx.c since there are no external users.
> >
> > Have tdx_reclaim_page() remain using struct page as parameter since it's
> > currently not used for removing guest private memory yet.
> >
> > [Yan: Use kvm_pfn_t, drop reclaim API param update, move mk_keyed_paddr()]
> >
> > Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx>
> > Signed-off-by: Yan Zhao <yan.y.zhao@xxxxxxxxx>
> > ---
> > arch/x86/include/asm/tdx.h | 15 ++-------------
> > arch/x86/kvm/vmx/tdx.c | 10 +++++-----
> > arch/x86/virt/vmx/tdx/tdx.c | 16 +++++++++++-----
> > 3 files changed, 18 insertions(+), 23 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> > index f3f0b1872176..6ceb4cd9ff21 100644
> > --- a/arch/x86/include/asm/tdx.h
> > +++ b/arch/x86/include/asm/tdx.h
> > @@ -153,7 +153,7 @@ int tdx_guest_keyid_alloc(void);
> > u32 tdx_get_nr_guest_keyids(void);
> > void tdx_guest_keyid_free(unsigned int keyid);
> > -void tdx_quirk_reset_page(struct page *page);
> > +void tdx_quirk_reset_page(kvm_pfn_t pfn);
> > struct tdx_td {
> > /* TD root structure: */
> > @@ -177,17 +177,6 @@ struct tdx_vp {
> > struct page **tdcx_pages;
> > };
> > -static inline u64 mk_keyed_paddr(u16 hkid, struct page *page)
> > -{
> > - u64 ret;
> > -
> > - ret = page_to_phys(page);
> > - /* KeyID bits are just above the physical address bits: */
> > - ret |= (u64)hkid << boot_cpu_data.x86_phys_bits;
> > -
> > - return ret;
> > -}
> > -
> > static inline int pg_level_to_tdx_sept_level(enum pg_level level)
> > {
> > WARN_ON_ONCE(level == PG_LEVEL_NONE);
> > @@ -219,7 +208,7 @@ u64 tdh_mem_track(struct tdx_td *tdr);
> > u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, u64 level, u64 *ext_err1, u64 *ext_err2);
> > u64 tdh_phymem_cache_wb(bool resume);
> > u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td);
> > -u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page);
> > +u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, kvm_pfn_t pfn);
> > #else
> > static inline void tdx_init(void) { }
> > static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
> > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > index 1f1abc5b5655..75ad3debcd84 100644
> > --- a/arch/x86/kvm/vmx/tdx.c
> > +++ b/arch/x86/kvm/vmx/tdx.c
> > @@ -343,7 +343,7 @@ static int tdx_reclaim_page(struct page *page)
> > r = __tdx_reclaim_page(page);
> > if (!r)
> > - tdx_quirk_reset_page(page);
> > + tdx_quirk_reset_page(page_to_pfn(page));
> > return r;
> > }
> > @@ -597,7 +597,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm)
> > if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
> > return;
> > - tdx_quirk_reset_page(kvm_tdx->td.tdr_page);
> > + tdx_quirk_reset_page(page_to_pfn(kvm_tdx->td.tdr_page));
> > __free_page(kvm_tdx->td.tdr_page);
> > kvm_tdx->td.tdr_page = NULL;
> > @@ -1776,9 +1776,9 @@ static int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
> > static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
> > enum pg_level level, u64 mirror_spte)
> > {
> > - struct page *page = pfn_to_page(spte_to_pfn(mirror_spte));
> > int tdx_level = pg_level_to_tdx_sept_level(level);
> > struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> > + kvm_pfn_t pfn = spte_to_pfn(mirror_spte);
> > gpa_t gpa = gfn_to_gpa(gfn);
> > u64 err, entry, level_state;
> > @@ -1817,11 +1817,11 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
> > if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_REMOVE, entry, level_state, kvm))
> > return;
> > - err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, page);
> > + err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, pfn);
> > if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
> > return;
> > - tdx_quirk_reset_page(page);
> > + tdx_quirk_reset_page(pfn);
> > }
> > void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
> > diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> > index a9dd75190c67..2f9d07ad1a9a 100644
> > --- a/arch/x86/virt/vmx/tdx/tdx.c
> > +++ b/arch/x86/virt/vmx/tdx/tdx.c
> > @@ -730,9 +730,9 @@ static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size)
> > mb();
> > }
> > -void tdx_quirk_reset_page(struct page *page)
> > +void tdx_quirk_reset_page(kvm_pfn_t pfn)
>
> So why keep the function tdx_quirk_reset_page() but expect passing in the
> kvm_pfn_t? It looks werid that the name indicates to reset a page but what
> gets passed in is a pfn.
I thought about introducing tdx_quirk_reset_pfn(). But considering
tdx_quirk_reset_pfn() has to be an exported API, I'm reluctant to do that.

Given that even with tdx_quirk_reset_pfn(), it still expects TDX convertible
RAM, I think having tdx_quirk_reset_page() to take pfn is still acceptable.

We just don't want KVM to do pfn --> struct page --> pfn conversions.