[v2 12/16] mm: handle PMD swap entries in MADV_WILLNEED

From: Usama Arif

Date: Tue Jun 02 2026 - 11:07:44 EST


swapin_walk_pmd_entry() walks PTEs and skips non-present PMDs, so
MADV_WILLNEED is a no-op on a PMD swap entry. Read the whole 2 MB
folio in at PMD order via swapin_sync(BIT(HPAGE_PMD_ORDER)) so the
subsequent fault hits do_huge_pmd_swap_page() and restores the THP
mapping; an order-0 read-ahead would force the fault to split.

Signed-off-by: Usama Arif <usama.arif@xxxxxxxxx>
---
mm/madvise.c | 40 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+)

diff --git a/mm/madvise.c b/mm/madvise.c
index 00539022f804..25f40542b951 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -193,6 +193,46 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
spinlock_t *ptl;
unsigned long addr;

+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
+ pmd_t pmdval = *pmd;
+
+ if (pmd_is_swap_entry(pmdval)) {
+ softleaf_t entry = softleaf_from_pmd(pmdval);
+ struct vm_fault vmf = {
+ .vma = vma,
+ .address = start,
+ .real_address = start,
+ .pmd = pmd,
+ };
+ struct swap_info_struct *si;
+ struct folio *folio;
+
+ /*
+ * Pin the swap device under the PMD lock so the
+ * lookup is atomic with the PMD-swap-entry
+ * observation; swapin_sync() requires its caller to
+ * keep the device valid for the duration of the call.
+ */
+ si = get_swap_device(entry);
+ spin_unlock(ptl);
+ if (!si) {
+ cond_resched();
+ return 0;
+ }
+
+ folio = swapin_sync(entry, GFP_HIGHUSER_MOVABLE,
+ BIT(HPAGE_PMD_ORDER), &vmf,
+ NULL, 0);
+ if (!IS_ERR_OR_NULL(folio))
+ folio_put(folio);
+ put_swap_device(si);
+ cond_resched();
+ return 0;
+ }
+ spin_unlock(ptl);
+ }
+
for (addr = start; addr < end; addr += PAGE_SIZE) {
pte_t pte;
softleaf_t entry;
--
2.52.0