Re: [PATCH mm-unstable v17 02/14] mm/khugepaged: generalize alloc_charge_folio()

From: Usama Arif

Date: Mon May 18 2026 - 08:02:25 EST

On Mon, 11 May 2026 12:58:02 -0600 Nico Pache <npache@xxxxxxxxxx> wrote:

> From: Dev Jain <dev.jain@xxxxxxx>
>
> Pass order to alloc_charge_folio() and update mTHP statistics.
>
> Reviewed-by: Wei Yang <richard.weiyang@xxxxxxxxx>
> Reviewed-by: Lance Yang <lance.yang@xxxxxxxxx>
> Reviewed-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
> Reviewed-by: Lorenzo Stoakes <ljs@xxxxxxxxxx>
> Reviewed-by: Zi Yan <ziy@xxxxxxxxxx>
> Acked-by: Usama Arif <usama.arif@xxxxxxxxx>
> Acked-by: David Hildenbrand (Arm) <david@xxxxxxxxxx>
> Signed-off-by: Dev Jain <dev.jain@xxxxxxx>
> Co-developed-by: Nico Pache <npache@xxxxxxxxxx>
> Signed-off-by: Nico Pache <npache@xxxxxxxxxx>
> ---
> Documentation/admin-guide/mm/transhuge.rst | 8 ++++++++
> include/linux/huge_mm.h | 2 ++
> mm/huge_memory.c | 4 ++++
> mm/khugepaged.c | 17 +++++++++++------
> 4 files changed, 25 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> index 5fbc3d89bb07..c51932e6275d 100644
> --- a/Documentation/admin-guide/mm/transhuge.rst
> +++ b/Documentation/admin-guide/mm/transhuge.rst
> @@ -639,6 +639,14 @@ anon_fault_fallback_charge
> instead falls back to using huge pages with lower orders or
> small pages even though the allocation was successful.
>
> +collapse_alloc
> + is incremented every time a huge page is successfully allocated for a
> + khugepaged collapse.
> +
> +collapse_alloc_failed
> + is incremented every time a huge page allocation fails during a
> + khugepaged collapse.
> +
> zswpout
> is incremented every time a huge page is swapped out to zswap in one
> piece without splitting.
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 2949e5acff35..ba7ae6808544 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -128,6 +128,8 @@ enum mthp_stat_item {
> MTHP_STAT_ANON_FAULT_ALLOC,
> MTHP_STAT_ANON_FAULT_FALLBACK,
> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> + MTHP_STAT_COLLAPSE_ALLOC,
> + MTHP_STAT_COLLAPSE_ALLOC_FAILED,
> MTHP_STAT_ZSWPOUT,
> MTHP_STAT_SWPIN,
> MTHP_STAT_SWPIN_FALLBACK,
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index e9d499da0ac7..05f482a72a89 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -699,6 +699,8 @@ static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
> DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC);
> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> +DEFINE_MTHP_STAT_ATTR(collapse_alloc, MTHP_STAT_COLLAPSE_ALLOC);
> +DEFINE_MTHP_STAT_ATTR(collapse_alloc_failed, MTHP_STAT_COLLAPSE_ALLOC_FAILED);
> DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
> DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
> DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
> @@ -764,6 +766,8 @@ static struct attribute *any_stats_attrs[] = {
> #endif
> &split_attr.attr,
> &split_failed_attr.attr,
> + &collapse_alloc_attr.attr,
> + &collapse_alloc_failed_attr.attr,
> NULL,
> };
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 979885694351..f0e29d5c7b1f 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1068,21 +1068,26 @@ static enum scan_result __collapse_huge_page_swapin(struct mm_struct *mm,
> }
>
> static enum scan_result alloc_charge_folio(struct folio **foliop, struct mm_struct *mm,
> - struct collapse_control *cc)
> + struct collapse_control *cc, unsigned int order)
> {
> gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() :
> GFP_TRANSHUGE);
> int node = collapse_find_target_node(cc);
> struct folio *folio;
>
> - folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask);
> + folio = __folio_alloc(gfp, order, node, &cc->alloc_nmask);
> if (!folio) {
> *foliop = NULL;
> - count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
> + if (is_pmd_order(order))
> + count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
> + count_mthp_stat(order, MTHP_STAT_COLLAPSE_ALLOC_FAILED);
> return SCAN_ALLOC_HUGE_PAGE_FAIL;
> }
>
> - count_vm_event(THP_COLLAPSE_ALLOC);
> + if (is_pmd_order(order))
> + count_vm_event(THP_COLLAPSE_ALLOC);
> + count_mthp_stat(order, MTHP_STAT_COLLAPSE_ALLOC);
> +

The vmstat THP_COLLAPSE_ALLOC counter is pmd order only.
But after this we have

count_memcg_folio_events(folio, THP_COLLAPSE_ALLOC, 1);

which is not being guarded with is_pmd_order().

I think we want this to be pmd order only as well so that
the meaning of the vmstat and cgroup counter remains the same?

> if (unlikely(mem_cgroup_charge(folio, mm, gfp))) {
> folio_put(folio);
> *foliop = NULL;
> @@ -1118,7 +1123,7 @@ static enum scan_result collapse_huge_page(struct mm_struct *mm, unsigned long a
> */
> mmap_read_unlock(mm);
>
> - result = alloc_charge_folio(&folio, mm, cc);
> + result = alloc_charge_folio(&folio, mm, cc, HPAGE_PMD_ORDER);
> if (result != SCAN_SUCCEED)
> goto out_nolock;
>
> @@ -1899,7 +1904,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
> VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
> VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
>
> - result = alloc_charge_folio(&new_folio, mm, cc);
> + result = alloc_charge_folio(&new_folio, mm, cc, HPAGE_PMD_ORDER);
> if (result != SCAN_SUCCEED)
> goto out;
>
> --
> 2.54.0
>
>