[PATCH] mm: skip dirty file folios during isolation of legacy LRU

From: zhaoyang.huang

Date: Fri Mar 20 2026 - 04:34:38 EST


From: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx>

Since dirty file folios are no longer writeout in reclaiming after
'commit 84798514db50 ("mm: Remove swap_writepage() and
shmem_writepage()")', there is no need to isolate them which could help
to improve the scan efficiency and decrease the unnecessary TLB flush.
This commit would like to bring the dirty file folios detection forward
to isolation phase as well as the statistics which could affect wakeup
the flusher thread under legacy LRU. In terms of MGLRU, the dirty file
folios have been brought to younger gen when sort_folios.

Signed-off-by: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx>
---
mm/vmscan.c | 103 ++++++++++++++++++++++++++++------------------------
1 file changed, 55 insertions(+), 48 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 10f1e7d716ca..79e5910ac62e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1103,7 +1103,6 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
struct address_space *mapping;
struct folio *folio;
enum folio_references references = FOLIOREF_RECLAIM;
- bool dirty, writeback;
unsigned int nr_pages;

cond_resched();
@@ -1142,26 +1141,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
if (!sc->may_unmap && folio_mapped(folio))
goto keep_locked;

- /*
- * The number of dirty pages determines if a node is marked
- * reclaim_congested. kswapd will stall and start writing
- * folios if the tail of the LRU is all dirty unqueued folios.
- */
- folio_check_dirty_writeback(folio, &dirty, &writeback);
- if (dirty || writeback)
- stat->nr_dirty += nr_pages;

- if (dirty && !writeback)
- stat->nr_unqueued_dirty += nr_pages;
-
- /*
- * Treat this folio as congested if folios are cycling
- * through the LRU so quickly that the folios marked
- * for immediate reclaim are making it to the end of
- * the LRU a second time.
- */
- if (writeback && folio_test_reclaim(folio))
- stat->nr_congested += nr_pages;

/*
* If a folio at the tail of the LRU is under writeback, there
@@ -1717,12 +1697,14 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };
unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
unsigned long skipped = 0, total_scan = 0, scan = 0;
+ unsigned long nr_dirty = 0, nr_unqueued_dirty = 0, nr_congested = 0;
unsigned long nr_pages;
unsigned long max_nr_skipped = 0;
LIST_HEAD(folios_skipped);

while (scan < nr_to_scan && !list_empty(src)) {
struct list_head *move_to = src;
+ bool dirty, writeback;
struct folio *folio;

folio = lru_to_folio(src);
@@ -1749,6 +1731,30 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
*/
scan += nr_pages;

+ if (!folio_trylock(folio))
+ goto move;
+ /*
+ * The number of dirty pages determines if a node is marked
+ * reclaim_congested. kswapd will stall and start writing
+ * folios if the tail of the LRU is all dirty unqueued folios.
+ */
+ folio_check_dirty_writeback(folio, &dirty, &writeback);
+ folio_unlock(folio);
+
+ if (dirty || writeback)
+ nr_dirty += nr_pages;
+
+ if (dirty && !writeback)
+ nr_unqueued_dirty += nr_pages;
+ /*
+ * Treat this folio as congested if folios are cycling
+ * through the LRU so quickly that the folios marked
+ * for immediate reclaim are making it to the end of
+ * the LRU a second time.
+ */
+ if (writeback && folio_test_reclaim(folio))
+ nr_congested += nr_pages;
+
if (!folio_test_lru(folio))
goto move;
if (!sc->may_unmap && folio_mapped(folio))
@@ -1798,6 +1804,35 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
total_scan, skipped, nr_taken, lru);
update_lru_sizes(lruvec, lru, nr_zone_taken);
+ /*
+ * If dirty folios are scanned that are not queued for IO, it
+ * implies that flushers are not doing their job. This can
+ * happen when memory pressure pushes dirty folios to the end of
+ * the LRU before the dirty limits are breached and the dirty
+ * data has expired. It can also happen when the proportion of
+ * dirty folios grows not through writes but through memory
+ * pressure reclaiming all the clean cache. And in some cases,
+ * the flushers simply cannot keep up with the allocation
+ * rate. Nudge the flusher threads in case they are asleep.
+ */
+ if (nr_unqueued_dirty == scan) {
+ wakeup_flusher_threads(WB_REASON_VMSCAN);
+ /*
+ * For cgroupv1 dirty throttling is achieved by waking up
+ * the kernel flusher here and later waiting on folios
+ * which are in writeback to finish (see shrink_folio_list()).
+ *
+ * Flusher may not be able to issue writeback quickly
+ * enough for cgroupv1 writeback throttling to work
+ * on a large system.
+ */
+ if (!writeback_throttling_sane(sc))
+ reclaim_throttle(lruvec_pgdat(lruvec), VMSCAN_THROTTLE_WRITEBACK);
+ }
+ sc->nr.dirty += nr_dirty;
+ sc->nr.congested += nr_congested;
+ sc->nr.unqueued_dirty += nr_unqueued_dirty;
+
return nr_taken;
}

@@ -2038,35 +2073,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
lru_note_cost_unlock_irq(lruvec, file, stat.nr_pageout,
nr_scanned - nr_reclaimed);

- /*
- * If dirty folios are scanned that are not queued for IO, it
- * implies that flushers are not doing their job. This can
- * happen when memory pressure pushes dirty folios to the end of
- * the LRU before the dirty limits are breached and the dirty
- * data has expired. It can also happen when the proportion of
- * dirty folios grows not through writes but through memory
- * pressure reclaiming all the clean cache. And in some cases,
- * the flushers simply cannot keep up with the allocation
- * rate. Nudge the flusher threads in case they are asleep.
- */
- if (stat.nr_unqueued_dirty == nr_taken) {
- wakeup_flusher_threads(WB_REASON_VMSCAN);
- /*
- * For cgroupv1 dirty throttling is achieved by waking up
- * the kernel flusher here and later waiting on folios
- * which are in writeback to finish (see shrink_folio_list()).
- *
- * Flusher may not be able to issue writeback quickly
- * enough for cgroupv1 writeback throttling to work
- * on a large system.
- */
- if (!writeback_throttling_sane(sc))
- reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
- }

- sc->nr.dirty += stat.nr_dirty;
- sc->nr.congested += stat.nr_congested;
- sc->nr.unqueued_dirty += stat.nr_unqueued_dirty;
sc->nr.writeback += stat.nr_writeback;
sc->nr.immediate += stat.nr_immediate;
sc->nr.taken += nr_taken;
--
2.25.1