[PATCH v4 1/3] mm/swap: colocate page-cluster sysctl with swap readahead
From: Jianyue Wu
Date: Wed Jun 03 2026 - 09:20:45 EST
page_cluster and the vm.page-cluster sysctl are only used by swap-in
readahead in swap_state.c. Move them out of swap.c together with
swap_readahead_setup(), and make page_cluster static to that file.
Rename swap_setup() while moving it as well. The helper is internal to
MM and now only sets up swap readahead defaults and its sysctl hook, so
the more specific name matches its reduced scope.
swap_setup() previously lived in mm/swap.c, which is built
unconditionally, so the vm.page-cluster sysctl was registered also on
CONFIG_SWAP=n kernels. swap_readahead_setup() is now a no-op stub when
CONFIG_SWAP is disabled, so vm.page-cluster is no longer registered
there. The knob only tunes swap-in readahead and had no effect without
swap.
Suggested-by: Baoquan He <bhe@xxxxxxxxxx>
Suggested-by: Barry Song <baohua@xxxxxxxxxx>
Signed-off-by: Jianyue Wu <wujianyue000@xxxxxxxxx>
---
include/linux/swap.h | 1 -
mm/swap.c | 36 ------------------------------------
mm/swap.h | 8 ++++++--
mm/swap_state.c | 37 +++++++++++++++++++++++++++++++++++++
mm/vmscan.c | 2 +-
5 files changed, 44 insertions(+), 40 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 636d94108166..5bd6f1d5984a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -345,7 +345,6 @@ extern void lru_add_drain_cpu_zone(struct zone *zone);
extern void lru_add_drain_all(void);
void folio_deactivate(struct folio *folio);
void folio_mark_lazyfree(struct folio *folio);
-extern void swap_setup(void);
/* linux/mm/vmscan.c */
extern unsigned long zone_reclaimable_pages(struct zone *zone);
diff --git a/mm/swap.c b/mm/swap.c
index 588f50d8f1a8..e4b3dadaa6dc 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -43,10 +43,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/pagemap.h>
-/* How many pages do we try to swap or page in/out together? As a power of 2 */
-int page_cluster;
-static const int page_cluster_max = 31;
-
struct cpu_fbatches {
/*
* The following folio batches are grouped together because they are protected
@@ -1171,35 +1167,3 @@ void lru_reparent_memcg(struct mem_cgroup *memcg, struct mem_cgroup *parent, int
lruvec_reparent_lru(child_lruvec, parent_lruvec, lru, nid);
}
#endif
-
-static const struct ctl_table swap_sysctl_table[] = {
- {
- .procname = "page-cluster",
- .data = &page_cluster,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = (void *)&page_cluster_max,
- }
-};
-
-/*
- * Perform any setup for the swap system
- */
-void __init swap_setup(void)
-{
- unsigned long megs = PAGES_TO_MB(totalram_pages());
-
- /* Use a smaller cluster for small-memory machines */
- if (megs < 16)
- page_cluster = 2;
- else
- page_cluster = 3;
- /*
- * Right now other parts of the system means that we
- * _really_ don't want to cluster much more
- */
-
- register_sysctl_init("vm", swap_sysctl_table);
-}
diff --git a/mm/swap.h b/mm/swap.h
index 8742b82cd0db..f860f8c669e8 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -9,8 +9,6 @@ struct mempolicy;
struct swap_iocb;
struct swap_memcg_table;
-extern int page_cluster;
-
#if defined(MAX_POSSIBLE_PHYSMEM_BITS)
#define SWAP_CACHE_PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
#elif defined(MAX_PHYSMEM_BITS)
@@ -96,6 +94,8 @@ struct swap_ops {
};
#ifdef CONFIG_SWAP
+void swap_readahead_setup(void);
+
#include <linux/swapops.h> /* for swp_offset */
#include <linux/blk_types.h> /* for bio_end_io_t */
@@ -345,6 +345,10 @@ static inline unsigned int folio_swap_flags(struct folio *folio)
}
#else /* CONFIG_SWAP */
+static inline void swap_readahead_setup(void)
+{
+}
+
static inline struct swap_cluster_info *swap_cluster_lock(
struct swap_info_struct *si, pgoff_t offset, bool irq)
{
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b9613026950e..692dfcd89bcd 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -22,10 +22,15 @@
#include <linux/vmalloc.h>
#include <linux/huge_mm.h>
#include <linux/shmem_fs.h>
+#include <linux/sysctl.h>
#include "internal.h"
#include "swap_table.h"
#include "swap.h"
+/* Swap readahead cluster size, as a power of 2 pages. */
+static int page_cluster;
+static const int page_cluster_max = 31;
+
/*
* swapper_space is a fiction, retained to simplify the path through
* vmscan's shrink_folio_list.
@@ -986,6 +991,38 @@ struct folio *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
return folio;
}
+static const struct ctl_table swap_readahead_sysctl_table[] = {
+ {
+ .procname = "page-cluster",
+ .data = &page_cluster,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = (void *)&page_cluster_max,
+ }
+};
+
+/**
+ * swap_readahead_setup - defaults and sysctl for swap cache readahead clustering
+ */
+void __init swap_readahead_setup(void)
+{
+ unsigned long megs = PAGES_TO_MB(totalram_pages());
+
+ /* Use a smaller cluster for small-memory machines */
+ if (megs < 16)
+ page_cluster = 2;
+ else
+ page_cluster = 3;
+ /*
+ * Right now other parts of the system means that we
+ * _really_ don't want to cluster much more
+ */
+
+ register_sysctl_init("vm", swap_readahead_sysctl_table);
+}
+
#ifdef CONFIG_SYSFS
static ssize_t vma_ra_enabled_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2d44ebfebdea..e34f1565f42f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -7651,7 +7651,7 @@ static int __init kswapd_init(void)
{
int nid;
- swap_setup();
+ swap_readahead_setup();
for_each_node_state(nid, N_MEMORY)
kswapd_run(nid);
register_sysctl_init("vm", vmscan_sysctl_table);
--
2.43.0