Re: [PATCH] mm/alloc_tag: clear codetag for pages allocated before page_ext initialization
From: Hao Ge
Date: Thu Mar 19 2026 - 21:59:58 EST
On 2026/3/20 07:48, Suren Baghdasaryan wrote:
On Thu, Mar 19, 2026 at 4:44 PM Suren Baghdasaryan <surenb@xxxxxxxxxx> wrote:
On Thu, Mar 19, 2026 at 3:28 PM Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> wrote:Forgot to ask. The example you are using here is for page_ext
On Thu, 19 Mar 2026 16:31:53 +0800 Hao Ge <hao.ge@xxxxxxxxx> wrote:Hi Hao,
Due to initialization ordering, page_ext is allocated and initialized
relatively late during boot. Some pages have already been allocated
and freed before page_ext becomes available, leaving their codetag
uninitialized.
Thanks for the report.
Hmm. So, we are allocating pages before page_ext is initialized...
A clear example is in init_section_page_ext(): alloc_page_ext() calls
kmemleak_alloc().
allocation itself. Do you have any other examples where page
allocation happens before page_ext initialization? If that's the only
place, then we might be able to fix this in a simpler way by doing
something special for alloc_page_ext().
Hi Suren
To help illustrate the point, here's the debug log I added:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2d4b6f1a554e..ebfe636f5b07 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1293,6 +1293,9 @@ void __pgalloc_tag_add(struct page *page, struct task_struct *task,
alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr);
update_page_tag_ref(handle, &ref);
put_page_tag_ref(handle);
+ } else {
+ pr_warn("__pgalloc_tag_add: get_page_tag_ref failed! page=%p pfn=%lu nr=%u\n", page, page_to_pfn(page), nr);
+ dump_stack();
}
}
And I caught the following logs:
[ 0.296399] __pgalloc_tag_add: get_page_tag_ref failed! page=ffffea000400c700 pfn=1049372 nr=1
[ 0.296400] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 7.0.0-rc4-dirty #12 PREEMPT(lazy)
[ 0.296402] Hardware name: Red Hat KVM, BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 0.296402] Call Trace:
[ 0.296403] <TASK>
[ 0.296403] dump_stack_lvl+0x53/0x70
[ 0.296405] __pgalloc_tag_add+0x3a3/0x6e0
[ 0.296406] ? __pfx___pgalloc_tag_add+0x10/0x10
[ 0.296407] ? kasan_unpoison+0x27/0x60
[ 0.296409] ? __kasan_unpoison_pages+0x2c/0x40
[ 0.296411] get_page_from_freelist+0xa54/0x1310
[ 0.296413] __alloc_frozen_pages_noprof+0x206/0x4c0
[ 0.296415] ? __pfx___alloc_frozen_pages_noprof+0x10/0x10
[ 0.296417] ? stack_depot_save_flags+0x3f/0x680
[ 0.296418] ? ___slab_alloc+0x518/0x530
[ 0.296420] alloc_pages_mpol+0x13a/0x3f0
[ 0.296421] ? __pfx_alloc_pages_mpol+0x10/0x10
[ 0.296423] ? _raw_spin_lock_irqsave+0x8a/0xf0
[ 0.296424] ? __pfx__raw_spin_lock_irqsave+0x10/0x10
[ 0.296426] alloc_slab_page+0xc2/0x130
[ 0.296427] allocate_slab+0x77/0x2c0
[ 0.296429] ? syscall_enter_define_fields+0x3bb/0x5f0
[ 0.296430] ___slab_alloc+0x125/0x530
[ 0.296432] ? __trace_define_field+0x252/0x3d0
[ 0.296433] __kmalloc_noprof+0x329/0x630
[ 0.296435] ? syscall_enter_define_fields+0x3bb/0x5f0
[ 0.296436] syscall_enter_define_fields+0x3bb/0x5f0
[ 0.296438] ? __pfx_syscall_enter_define_fields+0x10/0x10
[ 0.296440] event_define_fields+0x326/0x540
[ 0.296441] __trace_early_add_events+0xac/0x3c0
[ 0.296443] trace_event_init+0x24c/0x460
[ 0.296445] trace_init+0x9/0x20
[ 0.296446] start_kernel+0x199/0x3c0
[ 0.296448] x86_64_start_reservations+0x18/0x30
[ 0.296449] x86_64_start_kernel+0xe2/0xf0
[ 0.296451] common_startup_64+0x13e/0x141
[ 0.296453] </TASK>
[ 0.312234] __pgalloc_tag_add: get_page_tag_ref failed! page=ffffea000400f900 pfn=1049572 nr=1
[ 0.312234] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 7.0.0-rc4-dirty #12 PREEMPT(lazy)
[ 0.312236] Hardware name: Red Hat KVM, BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 0.312236] Call Trace:
[ 0.312237] <TASK>
[ 0.312237] dump_stack_lvl+0x53/0x70
[ 0.312239] __pgalloc_tag_add+0x3a3/0x6e0
[ 0.312240] ? __pfx___pgalloc_tag_add+0x10/0x10
[ 0.312241] ? rmqueue.constprop.0+0x4fc/0x1ce0
[ 0.312243] ? kasan_unpoison+0x27/0x60
[ 0.312244] ? __kasan_unpoison_pages+0x2c/0x40
[ 0.312246] get_page_from_freelist+0xa54/0x1310
[ 0.312248] __alloc_frozen_pages_noprof+0x206/0x4c0
[ 0.312250] ? __pfx___alloc_frozen_pages_noprof+0x10/0x10
[ 0.312253] alloc_slab_page+0x39/0x130
[ 0.312254] allocate_slab+0x77/0x2c0
[ 0.312255] ? alloc_cpumask_var_node+0xc7/0x230
[ 0.312257] ___slab_alloc+0x46d/0x530
[ 0.312259] __kmalloc_node_noprof+0x2fa/0x680
[ 0.312261] ? alloc_cpumask_var_node+0xc7/0x230
[ 0.312263] alloc_cpumask_var_node+0xc7/0x230
[ 0.312264] init_desc+0x141/0x6b0
[ 0.312266] alloc_desc+0x108/0x1b0
[ 0.312267] early_irq_init+0xee/0x1c0
[ 0.312268] ? __pfx_early_irq_init+0x10/0x10
[ 0.312271] start_kernel+0x1ab/0x3c0
[ 0.312272] x86_64_start_reservations+0x18/0x30
[ 0.312274] x86_64_start_kernel+0xe2/0xf0
[ 0.312275] common_startup_64+0x13e/0x141
[ 0.312277] </TASK>
[ 0.312834] __pgalloc_tag_add: get_page_tag_ref failed! page=ffffea000400fc00 pfn=1049584 nr=1
[ 0.312835] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 7.0.0-rc4-dirty #12 PREEMPT(lazy)
[ 0.312836] Hardware name: Red Hat KVM, BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 0.312837] Call Trace:
[ 0.312837] <TASK>
[ 0.312838] dump_stack_lvl+0x53/0x70
[ 0.312840] __pgalloc_tag_add+0x3a3/0x6e0
[ 0.312841] ? __pfx___pgalloc_tag_add+0x10/0x10
[ 0.312842] ? rmqueue.constprop.0+0x4fc/0x1ce0
[ 0.312844] ? kasan_unpoison+0x27/0x60
[ 0.312845] ? __kasan_unpoison_pages+0x2c/0x40
[ 0.312847] get_page_from_freelist+0xa54/0x1310
[ 0.312849] __alloc_frozen_pages_noprof+0x206/0x4c0
[ 0.312851] ? __pfx___alloc_frozen_pages_noprof+0x10/0x10
[ 0.312853] alloc_pages_mpol+0x13a/0x3f0
[ 0.312855] ? __pfx_alloc_pages_mpol+0x10/0x10
[ 0.312856] ? xas_find+0x2d8/0x450
[ 0.312858] ? _raw_spin_lock+0x84/0xe0
[ 0.312859] ? __pfx__raw_spin_lock+0x10/0x10
[ 0.312861] alloc_pages_noprof+0xf6/0x2b0
[ 0.312862] __change_page_attr+0x293/0x850
[ 0.312864] ? __pfx___change_page_attr+0x10/0x10
[ 0.312865] ? _vm_unmap_aliases+0x2d0/0x650
[ 0.312868] ? __pfx__vm_unmap_aliases+0x10/0x10
[ 0.312869] __change_page_attr_set_clr+0x16c/0x360
[ 0.312871] ? spp_getpage+0xbb/0x1e0
[ 0.312872] change_page_attr_set_clr+0x220/0x3c0
[ 0.312873] ? flush_tlb_one_kernel+0xf/0x30
[ 0.312875] ? set_pte_vaddr_p4d+0x110/0x180
[ 0.312877] ? __pfx_change_page_attr_set_clr+0x10/0x10
[ 0.312878] ? __pfx_set_pte_vaddr_p4d+0x10/0x10
[ 0.312881] ? __pfx_mtree_load+0x10/0x10
[ 0.312883] ? __pfx_mtree_load+0x10/0x10
[ 0.312884] ? __asan_memcpy+0x3c/0x60
[ 0.312886] ? set_intr_gate+0x10c/0x150
[ 0.312888] set_memory_ro+0x76/0xa0
[ 0.312889] ? __pfx_set_memory_ro+0x10/0x10
[ 0.312891] idt_setup_apic_and_irq_gates+0x2c1/0x390
and more.
off topic - if we were to handle only alloc_page_ext() specifically, what would be the most straightforward
solution in your mind? I'd really appreciate your insight.
Thanks.
This branch can be marked as "unlikely".If the slab cache has no free objects, it falls backIt's unfortunate that this isn't __initdata.
to the buddy allocator to allocate memory. However, at this point page_ext
is not yet fully initialized, so these newly allocated pages have no
codetag set. These pages may later be reclaimed by KASAN,which causes
the warning to trigger when they are freed because their codetag ref is
still empty.
Use a global array to track pages allocated before page_ext is fully
initialized, similar to how kmemleak tracks early allocations.
When page_ext initialization completes, set their codetag
to empty to avoid warnings when they are freed later.
...
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -74,6 +74,9 @@ static inline void set_codetag_empty(union codetag_ref *ref)
#ifdef CONFIG_MEM_ALLOC_PROFILING
+bool mem_profiling_is_available(void);
+void alloc_tag_add_early_pfn(unsigned long pfn);
+
#define ALLOC_TAG_SECTION_NAME "alloc_tags"
struct codetag_bytes {
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 58991ab09d84..a5bf4e72c154 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -6,6 +6,7 @@
#include <linux/kallsyms.h>
#include <linux/module.h>
#include <linux/page_ext.h>
+#include <linux/pgalloc_tag.h>
#include <linux/proc_fs.h>
#include <linux/seq_buf.h>
#include <linux/seq_file.h>
@@ -26,6 +27,82 @@ static bool mem_profiling_support;
static struct codetag_type *alloc_tag_cttype;
+/*
+ * State of the alloc_tag
+ *
+ * This is used to describe the states of the alloc_tag during bootup.
+ *
+ * When we need to allocate page_ext to store codetag, we face an
+ * initialization timing problem:
+ *
+ * Due to initialization order, pages may be allocated via buddy system
+ * before page_ext is fully allocated and initialized. Although these
+ * pages call the allocation hooks, the codetag will not be set because
+ * page_ext is not yet available.
+ *
+ * When these pages are later free to the buddy system, it triggers
+ * warnings because their codetag is actually empty if
+ * CONFIG_MEM_ALLOC_PROFILING_DEBUG is enabled.
+ *
+ * Additionally, in this situation, we cannot record detailed allocation
+ * information for these pages.
+ */
+enum mem_profiling_state {
+ DOWN, /* No mem_profiling functionality yet */
+ UP /* Everything is working */
+};
+
+static enum mem_profiling_state mem_profiling_state = DOWN;
+
+bool mem_profiling_is_available(void)
+{
+ return mem_profiling_state == UP;
+}
+
+#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+
+#define EARLY_ALLOC_PFN_MAX 256
+
+static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX];
+static unsigned int early_pfn_count;
+static DEFINE_SPINLOCK(early_pfn_lock);
+
...
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1293,6 +1293,13 @@ void __pgalloc_tag_add(struct page *page, struct task_struct *task,
alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr);
update_page_tag_ref(handle, &ref);
put_page_tag_ref(handle);
+ } else {
We can have a pointer to a function that is initialized to point to+ /*All because of this, I believe. Is this fixable?
+ * page_ext is not available yet, record the pfn so we can
+ * clear the tag ref later when page_ext is initialized.
+ */
+ if (!mem_profiling_is_available())
+ alloc_tag_add_early_pfn(page_to_pfn(page));
}
}
If we take that `else', we know we're running in __init code, yes? I
don't see how `__init pgalloc_tag_add_early()' could be made to work.
hrm. Something clever, please.
alloc_tag_add_early_pfn, which is defined as __init and uses
early_pfns which now can be defined as __initdata. After
clear_early_alloc_pfn_tag_refs() is done we reset that pointer to
NULL. __pgalloc_tag_add() instead of calling alloc_tag_add_early_pfn()
directly checks that pointer and if it's not NULL then calls the
function that it points to. This way __pgalloc_tag_add() which is not
an __init function will be invoking alloc_tag_add_early_pfn() __init
function only until we are done with initialization. I haven't tried
this but I think that should work. This also eliminates the need for
mem_profiling_state variable since we can use this function pointer
instead.