Re: [syzbot] [mm?] [f2fs?] [exfat?] memory leak in __kfree_rcu_sheaf

From: Harry Yoo

Date: Tue Mar 17 2026 - 22:48:09 EST


#syz test

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index d79acf5c5100..b7be2cc1efc3 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -50,8 +50,8 @@
*
* The kmemleak_object structures have a use_count incremented or decremented
* using the get_object()/put_object() functions. When the use_count becomes
- * 0, this count can no longer be incremented and put_object() schedules the
- * kmemleak_object freeing via an RCU callback. All calls to the get_object()
+ * 0, this count can no longer be incremented and put_object() adds the
+ * kmemleak_object to a deferred free list. All calls to the get_object()
* function must be protected by rcu_read_lock() to avoid accessing a freed
* structure.
*/
@@ -93,6 +93,7 @@
#include <linux/mm.h>
#include <linux/workqueue.h>
#include <linux/crc32.h>
+#include <linux/llist.h>

#include <asm/sections.h>
#include <asm/processor.h>
@@ -138,7 +139,7 @@ struct kmemleak_object {
struct list_head object_list;
struct list_head gray_list;
struct rb_node rb_node;
- struct rcu_head rcu; /* object_list lockless traversal */
+ struct llist_node free_node; /* deferred freeing */
/* object usage count; object freed when use_count == 0 */
atomic_t use_count;
unsigned int del_state; /* deletion state */
@@ -209,6 +210,13 @@ static DEFINE_RAW_SPINLOCK(kmemleak_lock);
static struct kmem_cache *object_cache;
static struct kmem_cache *scan_area_cache;

+/* objects pending RCU-deferred freeing */
+static LLIST_HEAD(objects_to_free);
+static atomic_long_t objects_to_free_count;
+static void flush_deferred_frees_work(struct work_struct *work);
+static DECLARE_WORK(deferred_free_work, flush_deferred_frees_work);
+#define DEFERRED_FREE_BATCH 256
+
/* set if tracing memory operations is enabled */
static int kmemleak_enabled __read_mostly = 1;
/* same as above but only for the kmemleak_free() callback */
@@ -522,14 +530,12 @@ static void mem_pool_free(struct kmemleak_object *object)
}

/*
- * RCU callback to free a kmemleak_object.
+ * Free a kmemleak_object and its associated scan areas.
*/
-static void free_object_rcu(struct rcu_head *rcu)
+static void free_object(struct kmemleak_object *object)
{
struct hlist_node *tmp;
struct kmemleak_scan_area *area;
- struct kmemleak_object *object =
- container_of(rcu, struct kmemleak_object, rcu);

/*
* Once use_count is 0 (guaranteed by put_object), there is no other
@@ -543,11 +549,19 @@ static void free_object_rcu(struct rcu_head *rcu)
}

/*
- * Decrement the object use_count. Once the count is 0, free the object using
- * an RCU callback. Since put_object() may be called via the kmemleak_free() ->
- * delete_object() path, the delayed RCU freeing ensures that there is no
- * recursive call to the kernel allocator. Lock-less RCU object_list traversal
- * is also possible.
+ * Decrement the object use_count. Once the count is 0, add the object to the
+ * deferred free list. Since put_object() may be called via the
+ * kmemleak_free() -> delete_object() path, the deferred freeing ensures that
+ * there is no recursive call to the kernel allocator. Lock-less RCU
+ * object_list traversal is also possible. The actual freeing happens after
+ * an RCU grace period in flush_deferred_frees().
+ *
+ * Unlike the previous call_rcu()-based approach, this avoids embedding
+ * rcu_head in kmemleak_object. Objects from SLAB_NOLEAKTRACE caches (like
+ * kmemleak's own object_cache) are not tracked by kmemleak. When such
+ * objects were linked in the call_rcu callback chain via rcu_head->next,
+ * kmemleak could not scan through them, breaking the chain and causing
+ * false positive leak reports for objects queued after them.
*/
static void put_object(struct kmemleak_object *object)
{
@@ -558,14 +572,46 @@ static void put_object(struct kmemleak_object *object)
WARN_ON(object->flags & OBJECT_ALLOCATED);

/*
- * It may be too early for the RCU callbacks, however, there is no
+ * It may be too early for deferred freeing, however, there is no
* concurrent object_list traversal when !object_cache and all objects
* came from the memory pool. Free the object directly.
*/
- if (object_cache)
- call_rcu(&object->rcu, free_object_rcu);
- else
- free_object_rcu(&object->rcu);
+ if (object_cache) {
+ llist_add(&object->free_node, &objects_to_free);
+ if (atomic_long_inc_return(&objects_to_free_count) >=
+ DEFERRED_FREE_BATCH)
+ schedule_work(&deferred_free_work);
+ } else {
+ free_object(object);
+ }
+}
+
+/*
+ * Flush all deferred object frees after an RCU grace period. This must be
+ * called from a context that can block.
+ */
+static void flush_deferred_frees(void)
+{
+ struct llist_node *list;
+ struct kmemleak_object *object, *tmp;
+ long count = 0;
+
+ list = llist_del_all(&objects_to_free);
+ if (!list)
+ return;
+
+ synchronize_rcu();
+
+ llist_for_each_entry_safe(object, tmp, list, free_node) {
+ free_object(object);
+ count++;
+ }
+ atomic_long_sub(count, &objects_to_free_count);
+}
+
+static void flush_deferred_frees_work(struct work_struct *work)
+{
+ flush_deferred_frees();
}

/*
@@ -809,7 +855,7 @@ static void create_object_percpu(unsigned long ptr, size_t size,
}

/*
- * Mark the object as not allocated and schedule RCU freeing via put_object().
+ * Mark the object as not allocated and schedule deferred freeing via put_object().
*/
static void __delete_object(struct kmemleak_object *object)
{
@@ -2209,6 +2255,7 @@ static void __kmemleak_do_cleanup(void)
if (!(++cnt & 0x3f))
cond_resched();
}
+ flush_deferred_frees();
}

/*

base-commit: fda995dadf2960405545e5002aaa85207aa758cf
--
2.43.0