Re: [PATCH net-next v2 11/15] gve: split up notify block allocation and setup paths

From: Joshua Washington

Date: Sun Jun 07 2026 - 18:29:57 EST

On Tue, Jun 2, 2026 at 4:59 PM Harshitha Ramamurthy
<hramamurthy@xxxxxxxxxx> wrote:
>
> From: Joshua Washington <joshwash@xxxxxxxxxx>
>
> Before this patch, notify block allocation and setup occurred in the same
> method. This all occurred before gve_adminq_configure_device_resources,
> which populates the irq_db_indicies array, a DMA region with BAR offsets
> for MSI-X vectors.
>
> The coming mailbox mode will require notify blocks to be set up only
> after receiving the IRQ doorbell offsets, as the request does not work
> with a supplied DMA buffer in the way that admin queue mode does. The
> intended flow in that case would be:
>
> 1) allocate notify blocks
> 2) request doorbell information
> 3) set up MSI-X vectors based on doorbell info
>
> This ordering also works for admin queue mode, so it will be updated to
> match.
>
> Reviewed-by: Willem de Bruijn <willemb@xxxxxxxxxx>
> Reviewed-by: Jordan Rhee <jordanrhee@xxxxxxxxxx>
> Signed-off-by: Joshua Washington <joshwash@xxxxxxxxxx>
> Signed-off-by: Harshitha Ramamurthy <hramamurthy@xxxxxxxxxx>
> ---
> drivers/net/ethernet/google/gve/gve.h | 2 +
> drivers/net/ethernet/google/gve/gve_main.c | 158 ++++++++++++---------
> 2 files changed, 89 insertions(+), 71 deletions(-)
>
> diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
> index 939b245ed562..e173c14bdc08 100644
> --- a/drivers/net/ethernet/google/gve/gve.h
> +++ b/drivers/net/ethernet/google/gve/gve.h
> @@ -673,6 +673,7 @@ struct gve_notify_block {
> struct gve_tx_ring *tx; /* tx rings on this block */
> struct gve_rx_ring *rx; /* rx rings on this block */
> u32 irq;
> + bool irq_requested;
> };
>
> /* Tracks allowed and current rx queue settings */
> @@ -953,6 +954,7 @@ struct gve_priv {
> u64 link_speed;
> bool up_before_suspend; /* True if dev was up before suspend */
>
> + bool mgmt_irq_requested;
> struct gve_ptype_lut *ptype_lut_dqo;
>
> /* Must be a power of two. */
> diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
> index 06df6d8ad429..98970508ae54 100644
> --- a/drivers/net/ethernet/google/gve/gve_main.c
> +++ b/drivers/net/ethernet/google/gve/gve_main.c
> @@ -425,6 +425,24 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
> return work_done;
> }
>
> +static void gve_free_notify_blocks(struct gve_priv *priv)
> +{
> + pci_disable_msix(priv->pdev);
> + if (priv->irq_db_indices) {
> + dma_free_coherent(&priv->pdev->dev,
> + priv->num_ntfy_blks *
> + sizeof(*priv->irq_db_indices),
> + priv->irq_db_indices,
> + priv->irq_db_indices_bus);
> + priv->irq_db_indices = NULL;
> + }
> +
> + kvfree(priv->ntfy_blocks);
> + priv->ntfy_blocks = NULL;
> + kvfree(priv->msix_vectors);
> + priv->msix_vectors = NULL;
> +}
> +
> static const struct cpumask *gve_get_node_mask(struct gve_priv *priv)
> {
> if (priv->numa_node == NUMA_NO_NODE)
> @@ -436,11 +454,9 @@ static const struct cpumask *gve_get_node_mask(struct gve_priv *priv)
> static int gve_alloc_notify_blocks(struct gve_priv *priv)
> {
> int num_vecs_requested = priv->num_ntfy_blks + 1;
> - const struct cpumask *node_mask;
> - unsigned int cur_cpu;
> int vecs_enabled;
> - int i, j;
> int err;
> + int i;
>
> priv->msix_vectors = kvzalloc_objs(*priv->msix_vectors,
> num_vecs_requested);
> @@ -454,7 +470,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
> dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
> GVE_MIN_MSIX, vecs_enabled);
> err = vecs_enabled;
> - goto abort_with_msix_vectors;
> + goto abort;
> }
> if (vecs_enabled != num_vecs_requested) {
> int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
> @@ -477,15 +493,6 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
> priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
> }
>
> - /* Setup Management Vector - the last vector */
> - snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
> - pci_name(priv->pdev));
> - err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
> - gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
> - if (err) {
> - dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
> - goto abort_with_msix_enabled;
> - }
> priv->irq_db_indices =
> dma_alloc_coherent(&priv->pdev->dev,
> priv->num_ntfy_blks *
> @@ -493,15 +500,65 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
> &priv->irq_db_indices_bus, GFP_KERNEL);
> if (!priv->irq_db_indices) {
> err = -ENOMEM;
> - goto abort_with_mgmt_vector;
> + goto abort;
> }
>
> priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
> sizeof(*priv->ntfy_blocks), GFP_KERNEL);
> if (!priv->ntfy_blocks) {
> err = -ENOMEM;
> - goto abort_with_irq_db_indices;
> + goto abort;
> + }
> + return 0;
> +
> +abort:
> + gve_free_notify_blocks(priv);
> + return err;
> +}
> +
> +static void gve_teardown_notify_blocks(struct gve_priv *priv)
> +{
> + int i;
> +
> + if (!priv->ntfy_blocks)
> + return;
> +
> + for (i = 0; i < priv->num_ntfy_blks; i++) {
> + struct gve_notify_block *block = &priv->ntfy_blocks[i];
> +
> + if (!block->irq_requested)
> + continue;
> +
> + irq_set_affinity_hint(priv->msix_vectors[i].vector,
> + NULL);
> + free_irq(priv->msix_vectors[i].vector, block);
> + block->irq = 0;
> + block->irq_requested = false;
> + }
> +
> + if (priv->mgmt_irq_requested) {
> + free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
> + priv->mgmt_irq_requested = false;
> + }
> +}
> +
> +static int gve_setup_notify_blocks(struct gve_priv *priv)
> +{
> + const struct cpumask *node_mask;
> + unsigned int cur_cpu;
> + int i;
> + int err;
> +
> + /* Setup Management Vector - the last vector */
> + snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name),
> + "gve-mgmnt@pci:%s", pci_name(priv->pdev));
> + err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
> + gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
> + if (err) {
> + dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
> + return err;
> }
> + priv->mgmt_irq_requested = true;
>
> /* Setup the other blocks - the first n-1 vectors */
> node_mask = gve_get_node_mask(priv);
> @@ -519,9 +576,10 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
> if (err) {
> dev_err(&priv->pdev->dev,
> "Failed to receive msix vector %d\n", i);
> - goto abort_with_some_ntfy_blocks;
> + goto abort;
> }
> block->irq = priv->msix_vectors[msix_idx].vector;
> + block->irq_requested = true;
> irq_set_affinity_and_hint(block->irq,
> cpumask_of(cur_cpu));
> block->irq_db_index = &priv->irq_db_indices[i].index;
> @@ -535,61 +593,12 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
> cur_cpu = cpumask_first(node_mask);
> }
> return 0;
> -abort_with_some_ntfy_blocks:
> - for (j = 0; j < i; j++) {
> - struct gve_notify_block *block = &priv->ntfy_blocks[j];
> - int msix_idx = j;
>
> - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
> - NULL);
> - free_irq(priv->msix_vectors[msix_idx].vector, block);
> - block->irq = 0;
> - }
> - kvfree(priv->ntfy_blocks);
> - priv->ntfy_blocks = NULL;
> -abort_with_irq_db_indices:
> - dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
> - sizeof(*priv->irq_db_indices),
> - priv->irq_db_indices, priv->irq_db_indices_bus);
> - priv->irq_db_indices = NULL;
> -abort_with_mgmt_vector:
> - free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
> -abort_with_msix_enabled:
> - pci_disable_msix(priv->pdev);
> -abort_with_msix_vectors:
> - kvfree(priv->msix_vectors);
> - priv->msix_vectors = NULL;
> +abort:
> + gve_teardown_notify_blocks(priv);
> return err;
> }
>
> -static void gve_free_notify_blocks(struct gve_priv *priv)
> -{
> - int i;
> -
> - if (!priv->msix_vectors)
> - return;
> -
> - /* Free the irqs */
> - for (i = 0; i < priv->num_ntfy_blks; i++) {
> - struct gve_notify_block *block = &priv->ntfy_blocks[i];
> - int msix_idx = i;
> -
> - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
> - NULL);
> - free_irq(priv->msix_vectors[msix_idx].vector, block);
> - block->irq = 0;
> - }
> - free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
> - kvfree(priv->ntfy_blocks);
> - priv->ntfy_blocks = NULL;
> - dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
> - sizeof(*priv->irq_db_indices),
> - priv->irq_db_indices, priv->irq_db_indices_bus);
> - priv->irq_db_indices = NULL;
> - pci_disable_msix(priv->pdev);
> - kvfree(priv->msix_vectors);
> - priv->msix_vectors = NULL;
> -}
>
> static void gve_free_control_plane_resources(struct gve_priv *priv)
> {
> @@ -743,6 +752,7 @@ static void gve_teardown_control_plane_resources(struct gve_priv *priv)
>
> static void gve_teardown_device(struct gve_priv *priv)
> {
> + gve_teardown_notify_blocks(priv);

Sashiko says:
In gve_remove(), the patch series reordered the teardown sequence to call
destroy_workqueue() before gve_teardown_device().
Because gve_teardown_device() (via gve_teardown_notify_blocks()) is what
calls free_irq() to release the management interrupt, the interrupt remains
active while and after the workqueue is destroyed.
If a management interrupt fires during this window, gve_mgmnt_intr() will
[queue the service task on the work queue].
Can this sequence trigger a use-after-free by enqueuing work on the destroyed
workqueue?

This will be fixed by the disable_work() change in patch 9.

> gve_teardown_control_plane_resources(priv);
> gve_adminq_free(priv);
> /*
> @@ -2463,13 +2473,16 @@ static int gve_setup_device(struct gve_priv *priv)
>
> err = gve_alloc_control_plane_resources(priv);
> if (err)
> - goto err;
> + return err;
> +
> err = gve_setup_control_plane_resources(priv);
> if (err)
> - goto err;
> + return err;
> +
> + err = gve_setup_notify_blocks(priv);
> + if (err)
> + return err;
> return 0;
> -err:
> - return err;
> }
>
> static const struct gve_ctrl_ops gve_adminq_ops = {
> @@ -2591,6 +2604,9 @@ int gve_reset(struct gve_priv *priv, bool skip_queue_setup)
> gve_unregister_qpls(priv);
> }
>
> + gve_teardown_notify_blocks(priv);
> + gve_teardown_control_plane_resources(priv);
> +

Sashiko says:
...
The newly added gve_teardown_control_plane_resources(priv) call already
invokes gve_teardown_clock() internally when device_resources_ok is
set, so is the standalone gve_teardown_clock(priv) call further down
now redundant?

Will remove the redundant call.

> /* Reset the device by releasing the AQ. Rings and other resources
> * within the NIC are implicitly destroyed if commands fail.
> */
> --
> 2.54.0.1013.g208068f2d8-goog
>

--

Joshua Washington | Software Engineer | joshwash@xxxxxxxxxx | (414) 366-4423