[PATCH net-next 09/15] net/mlx5: LAG, store demux resources per master lag_func
From: Tariq Toukan
Date: Thu Jun 04 2026 - 07:51:30 EST
From: Shay Drory <shayd@xxxxxxxxxx>
The lag demux resources (flow table, flow group, and rules xarray)
are stored on the shared ldev. With Socket Direct, multiple SD groups
each create their own demux FT/FG during their master's IB device
initialization. Since they all write to the same ldev fields, the
second group's init overwrites the first group's pointers, leaking
the first group's FT/FG.
During teardown, the cleanup uses the overwritten pointers, destroying
the wrong group's resources and leaving leaked flow tables in the LAG
namespace. These leaked tables can interfere with subsequently created
demux tables.
Move the demux resources from the shared ldev to per-master lag_func
instances. Each master device now owns its own independent demux
state. The rule_add and rule_del helpers look up the appropriate
master's lag_func via the existing filter/group infrastructure.
Signed-off-by: Shay Drory <shayd@xxxxxxxxxx>
Reviewed-by: Mark Bloch <mbloch@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
.../net/ethernet/mellanox/mlx5/core/lag/lag.c | 95 +++++++++++++------
.../net/ethernet/mellanox/mlx5/core/lag/lag.h | 7 +-
2 files changed, 68 insertions(+), 34 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index dd3f18f85466..e23c1e81b98f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1590,7 +1590,7 @@ struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev,
struct mlx5_flow_table_attr *ft_attr,
- struct mlx5_lag *ldev)
+ struct lag_func *pf)
{
#ifdef CONFIG_MLX5_ESWITCH
struct mlx5_flow_namespace *ns;
@@ -1601,20 +1601,20 @@ static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev,
if (!ns)
return 0;
- ldev->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr);
- if (IS_ERR(ldev->lag_demux_ft))
- return PTR_ERR(ldev->lag_demux_ft);
+ pf->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr);
+ if (IS_ERR(pf->lag_demux_ft))
+ return PTR_ERR(pf->lag_demux_ft);
fg = mlx5_esw_lag_demux_fg_create(dev->priv.eswitch,
- ldev->lag_demux_ft);
+ pf->lag_demux_ft);
if (IS_ERR(fg)) {
err = PTR_ERR(fg);
- mlx5_destroy_flow_table(ldev->lag_demux_ft);
- ldev->lag_demux_ft = NULL;
+ mlx5_destroy_flow_table(pf->lag_demux_ft);
+ pf->lag_demux_ft = NULL;
return err;
}
- ldev->lag_demux_fg = fg;
+ pf->lag_demux_fg = fg;
return 0;
#else
return -EOPNOTSUPP;
@@ -1623,7 +1623,7 @@ static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev,
static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev,
struct mlx5_flow_table_attr *ft_attr,
- struct mlx5_lag *ldev)
+ struct lag_func *pf)
{
struct mlx5_flow_namespace *ns;
int err;
@@ -1632,12 +1632,12 @@ static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev,
if (!ns)
return 0;
- ldev->lag_demux_fg = NULL;
+ pf->lag_demux_fg = NULL;
ft_attr->max_fte = 1;
- ldev->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr);
- if (IS_ERR(ldev->lag_demux_ft)) {
- err = PTR_ERR(ldev->lag_demux_ft);
- ldev->lag_demux_ft = NULL;
+ pf->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr);
+ if (IS_ERR(pf->lag_demux_ft)) {
+ err = PTR_ERR(pf->lag_demux_ft);
+ pf->lag_demux_ft = NULL;
return err;
}
@@ -1648,6 +1648,7 @@ int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
struct mlx5_flow_table_attr *ft_attr)
{
struct mlx5_lag *ldev;
+ struct lag_func *pf;
if (!ft_attr)
return -EINVAL;
@@ -1656,12 +1657,16 @@ int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
if (!ldev)
return -ENODEV;
- xa_init(&ldev->lag_demux_rules);
+ pf = mlx5_lag_pf_by_dev(ldev, dev);
+ if (!pf)
+ return -ENODEV;
+
+ xa_init(&pf->lag_demux_rules);
if (mlx5_get_sd(dev))
- return mlx5_lag_demux_ft_fg_init(dev, ft_attr, ldev);
+ return mlx5_lag_demux_ft_fg_init(dev, ft_attr, pf);
- return mlx5_lag_demux_fw_init(dev, ft_attr, ldev);
+ return mlx5_lag_demux_fw_init(dev, ft_attr, pf);
}
EXPORT_SYMBOL(mlx5_lag_demux_init);
@@ -1670,40 +1675,63 @@ void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev)
struct mlx5_flow_handle *rule;
struct mlx5_lag *ldev;
unsigned long vport_num;
+ struct lag_func *pf;
ldev = mlx5_lag_dev(dev);
if (!ldev)
return;
- xa_for_each(&ldev->lag_demux_rules, vport_num, rule)
+ pf = mlx5_lag_pf_by_dev(ldev, dev);
+ if (!pf)
+ return;
+
+ xa_for_each(&pf->lag_demux_rules, vport_num, rule)
mlx5_del_flow_rules(rule);
- xa_destroy(&ldev->lag_demux_rules);
+ xa_destroy(&pf->lag_demux_rules);
- if (ldev->lag_demux_fg)
- mlx5_destroy_flow_group(ldev->lag_demux_fg);
- if (ldev->lag_demux_ft)
- mlx5_destroy_flow_table(ldev->lag_demux_ft);
- ldev->lag_demux_fg = NULL;
- ldev->lag_demux_ft = NULL;
+ if (pf->lag_demux_fg)
+ mlx5_destroy_flow_group(pf->lag_demux_fg);
+ if (pf->lag_demux_ft)
+ mlx5_destroy_flow_table(pf->lag_demux_ft);
+ pf->lag_demux_fg = NULL;
+ pf->lag_demux_ft = NULL;
}
EXPORT_SYMBOL(mlx5_lag_demux_cleanup);
+static struct lag_func *mlx5_lag_dev_get_master_pf(struct mlx5_lag *ldev,
+ struct mlx5_core_dev *dev)
+{
+ u32 filter = mlx5_lag_get_filter(ldev, dev);
+ int idx;
+
+ idx = mlx5_lag_get_dev_index_by_seq_filter(ldev, MLX5_LAG_P1, filter);
+ if (idx < 0)
+ return NULL;
+
+ return mlx5_lag_pf(ldev, idx);
+}
+
int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num,
int index)
{
struct mlx5_flow_handle *rule;
+ struct lag_func *master;
struct mlx5_lag *ldev;
int err;
ldev = mlx5_lag_dev(vport_dev);
- if (!ldev || !ldev->lag_demux_fg)
+ if (!ldev)
return 0;
- if (xa_load(&ldev->lag_demux_rules, index))
+ master = mlx5_lag_dev_get_master_pf(ldev, vport_dev);
+ if (!master || !master->lag_demux_fg)
+ return 0;
+
+ if (xa_load(&master->lag_demux_rules, index))
return 0;
rule = mlx5_esw_lag_demux_rule_create(vport_dev->priv.eswitch,
- vport_num, ldev->lag_demux_ft);
+ vport_num, master->lag_demux_ft);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
mlx5_core_warn(vport_dev,
@@ -1712,7 +1740,7 @@ int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num,
return err;
}
- err = xa_err(xa_store(&ldev->lag_demux_rules, index, rule,
+ err = xa_err(xa_store(&master->lag_demux_rules, index, rule,
GFP_KERNEL));
if (err) {
mlx5_del_flow_rules(rule);
@@ -1728,13 +1756,18 @@ EXPORT_SYMBOL(mlx5_lag_demux_rule_add);
void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int index)
{
struct mlx5_flow_handle *rule;
+ struct lag_func *master_pf;
struct mlx5_lag *ldev;
ldev = mlx5_lag_dev(dev);
- if (!ldev || !ldev->lag_demux_fg)
+ if (!ldev)
+ return;
+
+ master_pf = mlx5_lag_dev_get_master_pf(ldev, dev);
+ if (!master_pf || !master_pf->lag_demux_fg)
return;
- rule = xa_erase(&ldev->lag_demux_rules, index);
+ rule = xa_erase(&master_pf->lag_demux_rules, index);
if (rule)
mlx5_del_flow_rules(rule);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index 0296f752bb4c..c689f1951cd8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -59,6 +59,10 @@ struct lag_func {
struct mlx5_nb port_change_nb;
u32 group_id; /* SD group ID, 0 = not SD */
bool sd_fdb_active; /* set on all SD group members */
+ /* Lag demux resources - only populated on master devices */
+ struct mlx5_flow_table *lag_demux_ft;
+ struct mlx5_flow_group *lag_demux_fg;
+ struct xarray lag_demux_rules;
};
/* Used for collection of netdev event info. */
@@ -95,9 +99,6 @@ struct mlx5_lag {
/* Protect lag fields/state changes */
struct mutex lock;
struct lag_mpesw lag_mpesw;
- struct mlx5_flow_table *lag_demux_ft;
- struct mlx5_flow_group *lag_demux_fg;
- struct xarray lag_demux_rules;
};
static inline struct mlx5_lag *
--
2.44.0