Re: [PATCH v3 2/3] mm,memory_hotplug: Implement numa node notifier

From: ALOK TIWARI
Date: Fri May 02 2025 - 15:10:37 EST




- node_states_set_node(nid, &arg);
+ node_states_set_node(nid, &node_arg);
if (need_zonelists_rebuild)
build_all_zonelists(NULL);
@@ -1245,16 +1252,26 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
kswapd_run(nid);
kcompactd_run(nid);
+ if (node_arg.status_change_nid >= 0)
+ /*
+ * Node went from memoryless to have memory. Notifiy interested

typo Notifiy -> Notify
"to have memory" /s "to having memory"

+ * consumers
+ */
+ node_notify(NODE_BECAME_MEM_AWARE, &node_arg);
+
writeback_set_ratelimit();
- memory_notify(MEM_ONLINE, &arg);
+ memory_notify(MEM_ONLINE, &mem_arg);
return 0;
failed_addition:
pr_debug("online_pages [mem %#010llx-%#010llx] failed\n",
(unsigned long long) pfn << PAGE_SHIFT,
(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
- memory_notify(MEM_CANCEL_ONLINE, &arg);
+ if (cancel_mem_notifier_on_err)
+ memory_notify(MEM_CANCEL_ONLINE, &mem_arg);
+ if (cancel_node_notifier_on_err)
+ node_notify(NODE_CANCEL_MEM_AWARE, &node_arg);
remove_pfn_range_from_zone(zone, pfn, nr_pages);
return ret;
}
@@ -1888,48 +1905,29 @@ early_param("movable_node", cmdline_parse_movable_node);
/* check which state of node_states will be changed when offline memory */
static void node_states_check_changes_offline(unsigned long nr_pages,
- struct zone *zone, struct memory_notify *arg)
+ struct zone *zone, struct node_notify *arg)
{
struct pglist_data *pgdat = zone->zone_pgdat;
unsigned long present_pages = 0;
enum zone_type zt;
arg->status_change_nid = NUMA_NO_NODE;
- arg->status_change_nid_normal = NUMA_NO_NODE;
/*
- * Check whether node_states[N_NORMAL_MEMORY] will be changed.
- * If the memory to be offline is within the range
- * [0..ZONE_NORMAL], and it is the last present memory there,
- * the zones in that range will become empty after the offlining,
- * thus we can determine that we need to clear the node from
- * node_states[N_NORMAL_MEMORY].
+ * Here we count the possible pages within the range [0..ZONE_MOVABLE].
+ * If after having accounted all the pages, we see that the nr_pages to
+ * be offlined is over or equal to the accounted pages, we know that the
+ * node will become empty, ans so, we can clear it for N_MEMORY.

typo ans -> and
"over or equal to the accounted" -> "greater than or equal to the accounted"

*/
- for (zt = 0; zt <= ZONE_NORMAL; zt++)
+ for (zt = 0; zt <= ZONE_MOVABLE; zt++)
present_pages += pgdat->node_zones[zt].present_pages;
- if (zone_idx(zone) <= ZONE_NORMAL && nr_pages >= present_pages)
- arg->status_change_nid_normal = zone_to_nid(zone);
-
- /*
- * We have accounted the pages from [0..ZONE_NORMAL); ZONE_HIGHMEM
- * does not apply as we don't support 32bit.
- * Here we count the possible pages from ZONE_MOVABLE.
- * If after having accounted all the pages, we see that the nr_pages
- * to be offlined is over or equal to the accounted pages,
- * we know that the node will become empty, and so, we can clear
- * it for N_MEMORY as well.
- */
- present_pages += pgdat->node_zones[ZONE_MOVABLE].present_pages;
if (nr_pages >= present_pages)
arg->status_change_nid = zone_to_nid(zone);
}
-static void node_states_clear_node(int node, struct memory_notify *arg)
+static void node_states_clear_node(int node, struct node_notify *arg)
{
- if (arg->status_change_nid_normal >= 0)
- node_clear_state(node, N_NORMAL_MEMORY);
-
if (arg->status_change_nid >= 0)
node_clear_state(node, N_MEMORY);
}
@@ -1953,7 +1951,9 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
unsigned long pfn, managed_pages, system_ram_pages = 0;
const int node = zone_to_nid(zone);
unsigned long flags;
- struct memory_notify arg;
+ struct memory_notify mem_arg;
+ struct node_notify node_arg;
+ bool cancel_mem_notifier_on_err = false, cancel_node_notifier_on_err = false;
char *reason;
int ret;
@@ -2012,11 +2012,21 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
goto failed_removal_pcplists_disabled;
}
- arg.start_pfn = start_pfn;
- arg.nr_pages = nr_pages;
- node_states_check_changes_offline(nr_pages, zone, &arg);
+ mem_arg.start_pfn = start_pfn;
+ mem_arg.nr_pages = nr_pages;
+ node_states_check_changes_offline(nr_pages, zone, &node_arg);
+
+ if (node_arg.status_change_nid >= 0) {
+ cancel_node_notifier_on_err = true;
+ ret = node_notify(NODE_BECOMING_MEMORYLESS, &node_arg);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ goto failed_removal_isolated;
+ }
- ret = memory_notify(MEM_GOING_OFFLINE, &arg);
+ cancel_mem_notifier_on_err = true;
+ mem_arg.status_change_nid = node_arg.status_change_nid;
+ ret = memory_notify(MEM_GOING_OFFLINE, &mem_arg);
ret = notifier_to_errno(ret);
if (ret) {
reason = "notifier failure";
@@ -2096,27 +2106,32 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
* Make sure to mark the node as memory-less before rebuilding the zone
* list. Otherwise this node would still appear in the fallback lists.
*/
- node_states_clear_node(node, &arg);
+ node_states_clear_node(node, &node_arg);
if (!populated_zone(zone)) {
zone_pcp_reset(zone);
build_all_zonelists(NULL);
}
- if (arg.status_change_nid >= 0) {
+ if (node_arg.status_change_nid >= 0) {
kcompactd_stop(node);
kswapd_stop(node);
+ /* Node went memoryless. Notifiy interested consumers */

typo Notifiy -> Notify

+ node_notify(NODE_BECAME_MEMORYLESS, &node_arg);
}
writeback_set_ratelimit();
- memory_notify(MEM_OFFLINE, &arg);
+ memory_notify(MEM_OFFLINE, &mem_arg);
remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
return 0;
failed_removal_isolated:
/* pushback to free area */
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
- memory_notify(MEM_CANCEL_OFFLINE, &arg);
+ if (cancel_mem_notifier_on_err)
+ memory_notify(MEM_CANCEL_OFFLINE, &mem_arg);
+ if (cancel_node_notifier_on_err)
+ node_notify(NODE_CANCEL_MEMORYLESS, &node_arg);
failed_removal_pcplists_disabled:
lru_cache_enable();
zone_pcp_enable(zone);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f43951668c41..b3ad63fb3a2b 100644

Thanks,
Alok