[PATCH net-next 14/15] net/mlx5: SD, defer vport metadata init until SD is ready

From: Tariq Toukan

Date: Thu Jun 04 2026 - 07:54:42 EST


From: Shay Drory <shayd@xxxxxxxxxx>

Allow SD devices to transition to switchdev before the SD group is
fully up. Metadata allocation requires the SD group to be ready, so
defer it from esw_offloads_enable() until SD shared-FDB activation.

Add mlx5_esw_offloads_init_deferred_metadata() which allocates
per-vport metadata and refreshes the manager ingress ACLs that were
previously programmed with metadata=0. The helper is idempotent and
can be called multiple times.

Signed-off-by: Shay Drory <shayd@xxxxxxxxxx>
Reviewed-by: Mark Bloch <mbloch@xxxxxxxxxx>
Signed-off-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---
.../net/ethernet/mellanox/mlx5/core/eswitch.h | 1 +
.../mellanox/mlx5/core/eswitch_offloads.c | 46 ++++++++++++++++++-
.../net/ethernet/mellanox/mlx5/core/lib/sd.c | 16 +++++++
3 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index a5f0774834fe..ecf6a28a1c08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -440,6 +440,7 @@ struct mlx5_eswitch {

void esw_offloads_disable(struct mlx5_eswitch *esw);
int esw_offloads_enable(struct mlx5_eswitch *esw);
+int mlx5_esw_offloads_init_deferred_metadata(struct mlx5_eswitch *esw);
void esw_offloads_cleanup(struct mlx5_eswitch *esw);
int esw_offloads_init(struct mlx5_eswitch *esw);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 4d3f80bd6af0..503530b0acba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -43,6 +43,7 @@
#include "esw/acl/ofld.h"
#include "rdma.h"
#include "en.h"
+#include "en_rep.h"
#include "fs_core.h"
#include "lib/mlx5.h"
#include "lib/devcom.h"
@@ -3675,6 +3676,7 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,

WARN_ON(vport->metadata != vport->default_metadata);
mlx5_esw_match_metadata_free(esw, vport->default_metadata);
+ vport->default_metadata = 0;
}

static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw)
@@ -3711,6 +3713,38 @@ static int esw_offloads_metadata_init(struct mlx5_eswitch *esw)
return err;
}

+/* Deferred metadata init for SD devices: allocate vport metadata
+ * Safe to call multiple times - subsequent calls are no-ops.
+ */
+int mlx5_esw_offloads_init_deferred_metadata(struct mlx5_eswitch *esw)
+{
+ struct mlx5_vport *manager;
+ int err;
+
+ if (!mlx5_eswitch_vport_match_metadata_enabled(esw))
+ return 0;
+
+ manager = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+ if (IS_ERR(manager))
+ return PTR_ERR(manager);
+
+ /* Sanity check: skip if metadata was already initialized */
+ if (manager->default_metadata)
+ return 0;
+
+ err = esw_offloads_metadata_init(esw);
+ if (err)
+ return err;
+
+ /* Manager vport don't have a rep/netdev loaded but its ingress ACL
+ * was programmed with metadata=0 in esw_create_offloads_acl_tables() -
+ * refresh it explicitly.
+ */
+ mlx5_esw_acl_ingress_vport_metadata_update(esw, esw->manager_vport, 0);
+
+ return 0;
+}
+
int
esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport)
@@ -4053,7 +4087,17 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
if (err)
goto err_roce;

- err = esw_offloads_metadata_init(esw);
+ /* SD devices defer metadata init until SD is ready and
+ * mlx5_sd_pf_num_get() can return the correct pf_num.
+ */
+ if (!mlx5_get_sd(esw->dev)) {
+ err = esw_offloads_metadata_init(esw);
+ } else if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ struct mlx5_vport *uplink =
+ mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+
+ err = esw_offloads_vport_metadata_setup(esw, uplink);
+ }
if (err)
goto err_metadata;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index d2ed156ed1c6..82ae8c3969fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -988,6 +988,7 @@ static bool mlx5_sd_all_paired(struct mlx5_core_dev *primary)
static void mlx5_sd_activate_shared_fdb(struct mlx5_core_dev *primary)
{
struct mlx5_sd *sd = mlx5_get_sd(primary);
+ struct mlx5_core_dev *pos;
struct mlx5_lag *ldev;
struct lag_func *pf;
int err;
@@ -1016,6 +1017,21 @@ static void mlx5_sd_activate_shared_fdb(struct mlx5_core_dev *primary)
goto unlock;
}

+ /* Initialize vport metadata for all group devices. This is deferred
+ * from esw_offloads_enable() because mlx5_sd_pf_num_get() requires
+ * the SD group to be ready.
+ */
+ mlx5_sd_for_each_dev(i, primary, pos) {
+ struct mlx5_eswitch *esw = pos->priv.eswitch;
+
+ err = mlx5_esw_offloads_init_deferred_metadata(esw);
+ if (err) {
+ sd_warn(primary, "Failed to init metadata for %s: %d\n",
+ dev_name(pos->device), err);
+ goto unlock;
+ }
+ }
+
err = mlx5_lag_shared_fdb_create(ldev, NULL, 0, sd->group_id);
if (err)
sd_warn(primary, "Failed to create shared FDB: %d\n", err);
--
2.44.0