Re: [PATCH v2] mm/vmalloc: use dedicated unbound workqueue for vmap area draining

From: Uladzislau Rezki

Date: Thu Mar 19 2026 - 05:40:25 EST


On Thu, Mar 19, 2026 at 03:43:07AM -0400, lirongqing wrote:
> From: Li RongQing <lirongqing@xxxxxxxxx>
>
> The drain_vmap_area_work() function can take >10ms to complete when
> there are many accumulated vmap areas in a system with a high CPU
> count, causing workqueue watchdog warnings when run via
> schedule_work():
>
> [ 2069.796205] workqueue: drain_vmap_area_work hogged CPU for >10000us 4 times, consider switching to WQ_UNBOUND
> [ 2192.823225] workqueue: drain_vmap_area_work hogged CPU for >10000us 5 times, consider switching to WQ_UNBOUND
>
> Switch to a dedicated WQ_UNBOUND workqueue to allow the scheduler to
> run this background task on any available CPU, improving responsiveness.
> Use WQ_MEM_RECLAIM to ensure forward progress under memory pressure.
>
> Create vmap_drain_wq in vmalloc_init_late() which is called after
> workqueue_init_early() in start_kernel() to avoid boot-time crashes.
>
> Suggested-by: Uladzislau Rezki <urezki@xxxxxxxxx>
> Signed-off-by: Li RongQing <lirongqing@xxxxxxxxx>
> ---
> Diff with v1: create dedicated unbound workqueue
>
> include/linux/vmalloc.h | 2 ++
> init/main.c | 1 +
> mm/vmalloc.c | 14 +++++++++++++-
> 3 files changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> index e8e94f9..c028603 100644
> --- a/include/linux/vmalloc.h
> +++ b/include/linux/vmalloc.h
> @@ -301,11 +301,13 @@ static inline void set_vm_flush_reset_perms(void *addr)
> if (vm)
> vm->flags |= VM_FLUSH_RESET_PERMS;
> }
> +void __init vmalloc_init_late(void);
> #else /* !CONFIG_MMU */
> #define VMALLOC_TOTAL 0UL
>
> static inline unsigned long vmalloc_nr_pages(void) { return 0; }
> static inline void set_vm_flush_reset_perms(void *addr) {}
> +static inline void __init vmalloc_init_late(void) {}
> #endif /* CONFIG_MMU */
>
> #if defined(CONFIG_MMU) && defined(CONFIG_SMP)
> diff --git a/init/main.c b/init/main.c
> index 1cb395d..50b497f 100644
> --- a/init/main.c
> +++ b/init/main.c
> @@ -1099,6 +1099,7 @@ void start_kernel(void)
> * workqueue_init().
> */
> workqueue_init_early();
> + vmalloc_init_late();
>
No, no. We should not patch main.c for such purpose :)

> rcu_init();
> kvfree_rcu_init();
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 61caa55..a52ccd4 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1067,6 +1067,7 @@ static void reclaim_and_purge_vmap_areas(void);
> static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
> static void drain_vmap_area_work(struct work_struct *work);
> static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
> +static struct workqueue_struct *vmap_drain_wq;
>
> static __cacheline_aligned_in_smp atomic_long_t nr_vmalloc_pages;
> static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr;
> @@ -2471,7 +2472,7 @@ static void free_vmap_area_noflush(struct vmap_area *va)
>
> /* After this point, we may free va at any time */
> if (unlikely(nr_lazy > nr_lazy_max))
> - schedule_work(&drain_vmap_work);
> + queue_work(vmap_drain_wq, &drain_vmap_work);
> }
>
> /*
> @@ -5422,6 +5423,17 @@ vmap_node_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> return SHRINK_STOP;
> }
>
> +void __init vmalloc_init_late(void)
> +{
> + vmap_drain_wq = alloc_workqueue("vmap_drain",
> + WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
> + if (!vmap_drain_wq) {
> + pr_warn("vmap_drain_wq creation failed, using system_unbound_wq\n");
> + vmap_drain_wq = system_unbound_wq;
> + }
> +
> +}
> +
> void __init vmalloc_init(void)
> {
> struct shrinker *vmap_node_shrinker;
> --
> 2.9.4
>
Why can't you add this into the vmalloc_ini()?

--
Uladzislau Rezki