[PATCH 1/7 v2] mm/page_counter: introduce per-page_counter stock
From: Joshua Hahn
Date: Fri May 22 2026 - 18:13:48 EST
In order to avoid expensive hierarchy walks on every memcg charge and
limit check, memcontrol uses per-cpu stocks (memcg_stock_pcp) to cache
pre-charged pages and introduce a fast path to try_charge_memcg.
However, there are a few quirks with the current implementation that
could be improved upon.
First, each memcg_stock_pcp can only cache the charges of 7 memcgs
(defined as NR_MEMCG_STOCK), which means that once a CPU starts handling
the charging of more than 7 memcgs, it randomly selects a victim memcg
to evict and drain from the cpu, which can cause unnecessarily increased
latencies and thrashing as memcgs continually evict each others' stock.
Second, stock is tightly coupled with memcg, which means that all page
counters in a memcg share the same resource. This may simplify some of
the charging logic, but it prevents new page counters from being added
and using a separate stock.
We can address these concerns by pushing the concept of stock down to
the page_counter level, which addresses the random eviction problem by
getting rid of the 7 slot limit, and makes enabling separate stock
caches for other page_counters simpler.
Introduce a generic per-cpu stock directly in struct page_counter.
Stock can optionally be enabled per-page_counter, limiting the overhead
increase for page_counters who do not benefit greatly from caching
charges.
This patch introduces the page_counter_stock struct and its
enable/disable/free functions, but does not use these yet.
Suggested-by: Johannes Weiner <hannes@xxxxxxxxxxx>
Signed-off-by: Joshua Hahn <joshua.hahnjy@xxxxxxxxx>
---
include/linux/page_counter.h | 13 ++++++++
mm/page_counter.c | 64 ++++++++++++++++++++++++++++++++++++
2 files changed, 77 insertions(+)
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index d649b6bbbc871..c7e3ab3356d20 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -5,8 +5,15 @@
#include <linux/atomic.h>
#include <linux/cache.h>
#include <linux/limits.h>
+#include <linux/local_lock.h>
+#include <linux/percpu.h>
#include <asm/page.h>
+struct page_counter_stock {
+ local_trylock_t lock;
+ unsigned long nr_pages;
+};
+
struct page_counter {
/*
* Make sure 'usage' does not share cacheline with any other field in
@@ -41,6 +48,8 @@ struct page_counter {
unsigned long high;
unsigned long max;
struct page_counter *parent;
+ struct page_counter_stock __percpu *stock;
+ unsigned int batch;
} ____cacheline_internodealigned_in_smp;
#if BITS_PER_LONG == 32
@@ -99,6 +108,10 @@ static inline void page_counter_reset_watermark(struct page_counter *counter)
counter->watermark = usage;
}
+int page_counter_enable_stock(struct page_counter *counter, unsigned int batch);
+void page_counter_disable_stock(struct page_counter *counter);
+void page_counter_free_stock(struct page_counter *counter);
+
#if IS_ENABLED(CONFIG_MEMCG) || IS_ENABLED(CONFIG_CGROUP_DMEM)
void page_counter_calculate_protection(struct page_counter *root,
struct page_counter *counter,
diff --git a/mm/page_counter.c b/mm/page_counter.c
index 661e0f2a5127a..a1a871a9d5c49 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -8,6 +8,7 @@
#include <linux/page_counter.h>
#include <linux/atomic.h>
#include <linux/kernel.h>
+#include <linux/percpu.h>
#include <linux/string.h>
#include <linux/sched.h>
#include <linux/bug.h>
@@ -289,6 +290,69 @@ int page_counter_memparse(const char *buf, const char *max,
return 0;
}
+int page_counter_enable_stock(struct page_counter *counter, unsigned int batch)
+{
+ struct page_counter_stock __percpu *stock;
+ int cpu;
+
+ stock = alloc_percpu(struct page_counter_stock);
+ if (!stock)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ struct page_counter_stock *s = per_cpu_ptr(stock, cpu);
+
+ local_trylock_init(&s->lock);
+ }
+ counter->stock = stock;
+ counter->batch = batch;
+
+ return 0;
+}
+
+static void page_counter_drain_stock_nolock(struct page_counter *counter)
+{
+ unsigned long stock_to_drain = 0;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct page_counter_stock *stock;
+
+ stock = per_cpu_ptr(counter->stock, cpu);
+ stock_to_drain += stock->nr_pages;
+ stock->nr_pages = 0;
+ }
+
+ if (stock_to_drain)
+ page_counter_uncharge(counter, stock_to_drain);
+}
+
+void page_counter_disable_stock(struct page_counter *counter)
+{
+ if (!counter->stock)
+ return;
+
+ /* This prevents future charges from trying to deposit pages */
+ WRITE_ONCE(counter->batch, 0);
+
+ /*
+ * Charges can still be in-flight at this time. Instead of locking here,
+ * do the majority of the drains here without locking to free up pages
+ * now. Any remaining stock will be drained in page_counter_free_stock.
+ */
+ page_counter_drain_stock_nolock(counter);
+}
+
+void page_counter_free_stock(struct page_counter *counter)
+{
+ if (!counter->stock)
+ return;
+
+ page_counter_drain_stock_nolock(counter);
+ free_percpu(counter->stock);
+ counter->stock = NULL;
+}
+
#if IS_ENABLED(CONFIG_MEMCG) || IS_ENABLED(CONFIG_CGROUP_DMEM)
/*
--
2.53.0-Meta