Re: [PATCH net-next v2 15/15] gve: add link status/speed ctrl ops
From: Joshua Washington
Date: Sun Jun 07 2026 - 18:46:09 EST
On Tue, Jun 2, 2026 at 4:59 PM Harshitha Ramamurthy
<hramamurthy@xxxxxxxxxx> wrote:
>
> From: Joshua Washington <joshwash@xxxxxxxxxx>
>
> Refactor link status check to use a control plane op. Introduce
> new op for retrieving the link status in AQ mode. This op reads
> the link status from the device status register and stores the
> value in priv.
>
> Also add an op for retrieving link speed in AQ mode which calls into
> gve_adminq_report_link_speed.
>
> Reviewed-by: Willem de Bruijn <willemb@xxxxxxxxxx>
> Reviewed-by: Jordan Rhee <jordanrhee@xxxxxxxxxx>
> Signed-off-by: Joshua Washington <joshwash@xxxxxxxxxx>
> Signed-off-by: Harshitha Ramamurthy <hramamurthy@xxxxxxxxxx>
> ---
> drivers/net/ethernet/google/gve/gve.h | 5 ++++
> drivers/net/ethernet/google/gve/gve_adminq.c | 9 ++++++++
> drivers/net/ethernet/google/gve/gve_adminq.h | 1 +
> drivers/net/ethernet/google/gve/gve_ethtool.c | 3 +--
> drivers/net/ethernet/google/gve/gve_main.c | 23 +++++++++++--------
> 5 files changed, 29 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
> index 4283cb9b49de..c8ebbbb93942 100644
> --- a/drivers/net/ethernet/google/gve/gve.h
> +++ b/drivers/net/ethernet/google/gve/gve.h
> @@ -843,6 +843,8 @@ struct gve_device_info {
> * @reset_flow_rules: Flush all flow rules from device
> * @create_queues: Sends commands to the device to create TX/RX queues.
> * @destroy_queues: Sends commands to the device to destroy TX/RX queues.
> + * @report_link_status: Set link status into @priv->link_up
> + * @report_link_speed: Set link status into @priv->link_speed
Sashiko says:
Should this read "Set link speed into @priv->link_speed" instead? ...
Will fix in v3.
> */
> struct gve_ctrl_ops {
> int (*map_db_bar)(struct gve_priv *priv);
> @@ -863,6 +865,8 @@ struct gve_ctrl_ops {
> int (*reset_flow_rules)(struct gve_priv *priv);
> int (*create_queues)(struct gve_priv *priv);
> int (*destroy_queues)(struct gve_priv *priv);
> + int (*report_link_status)(struct gve_priv *priv);
> + int (*report_link_speed)(struct gve_priv *priv);
> };
>
> struct gve_priv {
> @@ -964,6 +968,7 @@ struct gve_priv {
>
> /* Gvnic device link speed from hypervisor. */
> u64 link_speed;
> + bool link_up;
> bool up_before_suspend; /* True if dev was up before suspend */
>
> bool mgmt_irq_requested;
> diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
> index 799381d24c71..b33633555b71 100644
> --- a/drivers/net/ethernet/google/gve/gve_adminq.c
> +++ b/drivers/net/ethernet/google/gve/gve_adminq.c
> @@ -1872,3 +1872,12 @@ int gve_adminq_create_queues(struct gve_priv *priv)
>
> return err;
> }
> +
> +int gve_adminq_report_link_status(struct gve_priv *priv)
> +{
> + u32 status;
> +
> + status = ioread32be(&priv->reg_bar0->device_status);
> + priv->link_up = !!(GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
> + return 0;
> +}
> diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
> index 03e8318f7327..82413369dec9 100644
> --- a/drivers/net/ethernet/google/gve/gve_adminq.h
> +++ b/drivers/net/ethernet/google/gve/gve_adminq.h
> @@ -643,6 +643,7 @@ int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len,
> dma_addr_t stats_report_addr, u64 interval);
> int gve_adminq_verify_driver_compatibility(struct gve_priv *priv);
> int gve_adminq_get_device_properties(struct gve_priv *priv);
> +int gve_adminq_report_link_status(struct gve_priv *priv);
> int gve_adminq_report_link_speed(struct gve_priv *priv);
> int gve_adminq_add_flow_rule(struct gve_priv *priv, struct gve_adminq_flow_rule *rule, u32 loc);
> int gve_adminq_del_flow_rule(struct gve_priv *priv, u32 loc);
> diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c
> index 54b623b678ce..aaf5b8b1671b 100644
> --- a/drivers/net/ethernet/google/gve/gve_ethtool.c
> +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
> @@ -749,12 +749,11 @@ static int gve_get_link_ksettings(struct net_device *netdev,
> int err = 0;
>
> if (priv->link_speed == 0)
> - err = gve_adminq_report_link_speed(priv);
> + err = priv->ctrl_ops->report_link_speed(priv);
Sashiko says:
---
This is a pre-existing issue, but I have a question about the timeout handling
inside gve_adminq_report_link_speed().
If gve_adminq_execute_cmd() returns a timeout error, the function drops the
error and immediately frees the DMA memory via dma_free_coherent(). However,
since a host CPU timeout does not guarantee the NIC has aborted the DMA
operation, could the hardware eventually execute a DMA write to the provided
bus address after the memory has been reallocated, causing memory corruption?
This is another pre-existing issue, but could this ethtool call trigger a
use-after-free if a previous device reset failed?
If a device reset fails, gve_adminq_free() destroys the adminq_pool and frees
the DMA memory backing priv->adminq. If user-space subsequently issues an
ethtool command like gve_get_link_ksettings, the driver invokes
report_link_speed() and eventually gve_adminq_issue_cmd().
If the device's tail counter was reset to 0, and the host's head counter is
still 0, could gve_adminq_issue_cmd() write the command payload into the
now-dangling priv->adminq buffer?
---
I think that this is a larger scale issue that affects all admin queue
commands that could time out. I think a solution here would be:
1) submit a net fix to allocate from dma pool in gve_amdinq_report_link_speed
2) follow up with a patch that invokes a reset on a timed out AQ command.
As far as any cross-reset failures go, that would require a much
larger behavioral change in the driver, as the driver assumes that if
the actual hardware reset fails, it is an “unrecoverable platform
error” and makes no promises about future driver behavior.
>
> cmd->base.speed = priv->link_speed;
>
> cmd->base.duplex = DUPLEX_FULL;
> -
> return err;
> }
>
> diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
> index dfc734d8bec9..181ec6919051 100644
> --- a/drivers/net/ethernet/google/gve/gve_main.c
> +++ b/drivers/net/ethernet/google/gve/gve_main.c
> @@ -1475,15 +1475,15 @@ static int gve_close(struct net_device *dev)
> return err;
> }
>
> -static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
> +static void gve_handle_link_status(struct gve_priv *priv)
> {
> if (!gve_get_napi_enabled(priv))
> return;
>
> - if (link_status == netif_carrier_ok(priv->dev))
> + if (priv->link_up == netif_carrier_ok(priv->dev))
> return;
>
> - if (link_status) {
> + if (priv->link_up) {
Sashiko says:
Is there a time-of-check to time-of-use race here?
Because priv->link_up is read twice in quick succession without protection,
and could be modified concurrently by gve_adminq_report_link_status() running
in the lockless service_task workqueue, can the value change between the two
checks?
Would using READ_ONCE() and saving the result to a local variable prevent
unintended compiler optimizations and state changes?
Will fix in v3.
> netdev_info(priv->dev, "Device link is up.\n");
> netif_carrier_on(priv->dev);
> } else {
> @@ -1510,7 +1510,6 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
> {
> struct bpf_prog *old_prog;
> int err = 0;
> - u32 status;
>
> old_prog = READ_ONCE(priv->xdp_prog);
> if (!netif_running(priv->dev)) {
> @@ -1537,8 +1536,8 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
> bpf_prog_put(old_prog);
>
> out:
> - status = ioread32be(&priv->reg_bar0->device_status);
> - gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
> + priv->ctrl_ops->report_link_status(priv);
> + gve_handle_link_status(priv);
> return err;
> }
>
> @@ -1971,11 +1970,11 @@ static void gve_turnup(struct gve_priv *priv)
>
> static void gve_turnup_and_check_status(struct gve_priv *priv)
> {
> - u32 status;
> + const struct gve_ctrl_ops *ops = priv->ctrl_ops;
>
> gve_turnup(priv);
> - status = ioread32be(&priv->reg_bar0->device_status);
> - gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
> + ops->report_link_status(priv);
> + gve_handle_link_status(priv);
> }
>
> static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv,
> @@ -2299,12 +2298,14 @@ static void gve_service_task(struct work_struct *work)
> {
> struct gve_priv *priv = container_of(work, struct gve_priv,
> service_task);
> + const struct gve_ctrl_ops *ops = priv->ctrl_ops;
> u32 status = ioread32be(&priv->reg_bar0->device_status);
>
> gve_handle_status(priv, status);
>
> gve_handle_reset(priv);
> - gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
> + ops->report_link_status(priv);
> + gve_handle_link_status(priv);
> }
>
> static void gve_set_netdev_xdp_features(struct gve_priv *priv)
> @@ -2427,6 +2428,8 @@ static const struct gve_ctrl_ops gve_adminq_ops = {
> .teardown_mgmt_irq = gve_adminq_teardown_mgmt_irq,
> .create_queues = gve_adminq_create_queues,
> .destroy_queues = gve_adminq_destroy_queues,
> + .report_link_status = gve_adminq_report_link_status,
> + .report_link_speed = gve_adminq_report_link_speed,
> };
>
> static int gve_init_priv(struct gve_priv *priv)
> --
> 2.54.0.1013.g208068f2d8-goog
>
--
Joshua Washington | Software Engineer | joshwash@xxxxxxxxxx | (414) 366-4423