[PATCH mm-unstable RFC v4 5/7] mm/huge_memory: refactor __split_huge_pmd_locked()

From: Yin Tirui

Date: Tue May 26 2026 - 11:26:48 EST


Rework __split_huge_pmd_locked() to classify huge PMDs by the PMD entry
itself instead of starting from vma_is_anonymous().

Present PMDs are classified with vm_normal_folio_pmd(): file/shmem THPs
are dropped and refaulted later, anonymous THPs are split into PTEs, and
PMDs without a normal folio are handled as huge zero or special PMDs.

Non-present PMDs are classified with pmd_to_softleaf_folio(): file/shmem
migration entries are dropped, while anonymous migration/device-private
entries are split into PTEs.

This also makes the anonymous decision folio-based. A private file
mapping that has CoW'ed to an anonymous THP now follows the anonymous
path even though the VMA is file-backed.

No intended behavioural change.

Signed-off-by: Yin Tirui <yintirui@xxxxxxxxxx>
---
mm/huge_memory.c | 197 +++++++++++++++++++++++++++--------------------
1 file changed, 114 insertions(+), 83 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3964258ff91d..8cd77389d52f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3136,25 +3136,38 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,

count_vm_event(THP_SPLIT_PMD);

- if (!vma_is_anonymous(vma)) {
- old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
- /*
- * We are going to unmap this huge page. So
- * just go ahead and zap it
- */
- if (arch_needs_pgtable_deposit())
- zap_deposited_table(mm, pmd);
- if (vma_is_special_huge(vma))
- return;
- if (unlikely(pmd_is_migration_entry(old_pmd))) {
- const softleaf_t old_entry = softleaf_from_pmd(old_pmd);
+ if (pmd_present(*pmd)) {
+ folio = vm_normal_folio_pmd(vma, haddr, *pmd);
+
+ if (unlikely(!folio)) {
+ if (is_huge_zero_pmd(*pmd)) {
+ /*
+ * FIXME: Do we want to invalidate secondary mmu by calling
+ * mmu_notifier_arch_invalidate_secondary_tlbs() see comments below
+ * inside __split_huge_pmd() ?
+ *
+ * We are going from a zero huge page write protected to zero
+ * small page also write protected so it does not seems useful
+ * to invalidate secondary mmu at this time.
+ */
+ return __split_huge_zero_page_pmd(vma, haddr, pmd);
+ }

- folio = softleaf_to_folio(old_entry);
- } else if (is_huge_zero_pmd(old_pmd)) {
+ /* Present but not a normal folio: drop the PMD. */
+ old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
+ if (arch_needs_pgtable_deposit())
+ zap_deposited_table(mm, pmd);
return;
- } else {
+ }
+
+ if (unlikely(!folio_test_anon(folio))) {
+ old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
+ if (arch_needs_pgtable_deposit())
+ zap_deposited_table(mm, pmd);
+ if (vma_is_special_huge(vma))
+ return;
+
page = pmd_page(old_pmd);
- folio = page_folio(page);
if (!folio_test_dirty(folio) && pmd_dirty(old_pmd))
folio_mark_dirty(folio);
if (!folio_test_referenced(folio) && pmd_young(old_pmd))
@@ -3164,72 +3177,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
folio_put(folio);
return;
}
- add_mm_counter(mm, mm_counter_file(folio), -HPAGE_PMD_NR);
- return;
- }
-
- if (is_huge_zero_pmd(*pmd)) {
- /*
- * FIXME: Do we want to invalidate secondary mmu by calling
- * mmu_notifier_arch_invalidate_secondary_tlbs() see comments below
- * inside __split_huge_pmd() ?
- *
- * We are going from a zero huge page write protected to zero
- * small page also write protected so it does not seems useful
- * to invalidate secondary mmu at this time.
- */
- return __split_huge_zero_page_pmd(vma, haddr, pmd);
- }
-
- if (pmd_is_migration_entry(*pmd)) {
- softleaf_t entry;
-
- old_pmd = *pmd;
- entry = softleaf_from_pmd(old_pmd);
- page = softleaf_to_page(entry);
- folio = page_folio(page);
-
- soft_dirty = pmd_swp_soft_dirty(old_pmd);
- uffd_wp = pmd_swp_uffd_wp(old_pmd);
-
- write = softleaf_is_migration_write(entry);
- if (PageAnon(page))
- anon_exclusive = softleaf_is_migration_read_exclusive(entry);
- young = softleaf_is_migration_young(entry);
- dirty = softleaf_is_migration_dirty(entry);
- } else if (pmd_is_device_private_entry(*pmd)) {
- softleaf_t entry;
-
- old_pmd = *pmd;
- entry = softleaf_from_pmd(old_pmd);
- page = softleaf_to_page(entry);
- folio = page_folio(page);
-
- soft_dirty = pmd_swp_soft_dirty(old_pmd);
- uffd_wp = pmd_swp_uffd_wp(old_pmd);
-
- write = softleaf_is_device_private_write(entry);
- anon_exclusive = PageAnonExclusive(page);
-
- /*
- * Device private THP should be treated the same as regular
- * folios w.r.t anon exclusive handling. See the comments for
- * folio handling and anon_exclusive below.
- */
- if (freeze && anon_exclusive &&
- folio_try_share_anon_rmap_pmd(folio, page))
- freeze = false;
- if (!freeze) {
- rmap_t rmap_flags = RMAP_NONE;
-
- folio_ref_add(folio, HPAGE_PMD_NR - 1);
- if (anon_exclusive)
- rmap_flags |= RMAP_EXCLUSIVE;

- folio_add_anon_rmap_ptes(folio, page, HPAGE_PMD_NR,
- vma, haddr, rmap_flags);
- }
- } else {
/*
* Up to this point the pmd is present and huge and userland has
* the whole access to the hugepage during the split (which
@@ -3255,7 +3203,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
*/
old_pmd = pmdp_invalidate(vma, haddr, pmd);
page = pmd_page(old_pmd);
- folio = page_folio(page);
if (pmd_dirty(old_pmd)) {
dirty = true;
folio_set_dirty(folio);
@@ -3266,7 +3213,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
uffd_wp = pmd_uffd_wp(old_pmd);

VM_WARN_ON_FOLIO(!folio_ref_count(folio), folio);
- VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);

/*
* Without "freeze", we'll simply split the PMD, propagating the
@@ -3296,6 +3242,85 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
folio_add_anon_rmap_ptes(folio, page, HPAGE_PMD_NR,
vma, haddr, rmap_flags);
}
+ } else {
+ /*
+ * Non-present PMD: a softleaf-encoded migration or
+ * device-private entry. pmd_to_softleaf_folio() warns and
+ * returns NULL for any other encoding.
+ */
+ folio = pmd_to_softleaf_folio(*pmd);
+ if (unlikely(!folio))
+ return;
+
+ if (unlikely(!folio_test_anon(folio))) {
+ /*
+ * File/shmem migration entry: drop the PMD without
+ * splitting. Unlike the present case the entry holds
+ * neither a folio reference nor an rmap to release,
+ * so just adjust the RSS counter.
+ */
+ pmdp_huge_clear_flush(vma, haddr, pmd);
+ if (arch_needs_pgtable_deposit())
+ zap_deposited_table(mm, pmd);
+ if (unlikely(vma_is_special_huge(vma))) {
+ VM_WARN_ONCE(1,
+ "unexpected special huge PMD migration entry\n");
+ return;
+ }
+ add_mm_counter(mm, mm_counter_file(folio), -HPAGE_PMD_NR);
+ return;
+ }
+
+ if (pmd_is_migration_entry(*pmd)) {
+ softleaf_t entry;
+
+ old_pmd = *pmd;
+ entry = softleaf_from_pmd(old_pmd);
+ page = softleaf_to_page(entry);
+
+ soft_dirty = pmd_swp_soft_dirty(old_pmd);
+ uffd_wp = pmd_swp_uffd_wp(old_pmd);
+
+ write = softleaf_is_migration_write(entry);
+ if (PageAnon(page))
+ anon_exclusive = softleaf_is_migration_read_exclusive(entry);
+ young = softleaf_is_migration_young(entry);
+ dirty = softleaf_is_migration_dirty(entry);
+ } else if (pmd_is_device_private_entry(*pmd)) {
+ softleaf_t entry;
+
+ old_pmd = *pmd;
+ entry = softleaf_from_pmd(old_pmd);
+ page = softleaf_to_page(entry);
+
+ soft_dirty = pmd_swp_soft_dirty(old_pmd);
+ uffd_wp = pmd_swp_uffd_wp(old_pmd);
+
+ write = softleaf_is_device_private_write(entry);
+ anon_exclusive = PageAnonExclusive(page);
+
+ /*
+ * Device-private THP should be treated the same as
+ * regular folios w.r.t. anon-exclusive handling. See
+ * the matching code for present anon folios above.
+ */
+ if (freeze && anon_exclusive &&
+ folio_try_share_anon_rmap_pmd(folio, page))
+ freeze = false;
+ if (!freeze) {
+ rmap_t rmap_flags = RMAP_NONE;
+
+ folio_ref_add(folio, HPAGE_PMD_NR - 1);
+ if (anon_exclusive)
+ rmap_flags |= RMAP_EXCLUSIVE;
+
+ folio_add_anon_rmap_ptes(folio, page, HPAGE_PMD_NR,
+ vma, haddr, rmap_flags);
+ }
+ } else {
+ VM_WARN_ON_ONCE(1);
+ return;
+ }
}

/*
--
2.43.0