[PATCH v3 net-next 3/3] amd-xgbe: add TX descriptor cleanup for link-down

From: Raju Rangoju

Date: Thu Mar 19 2026 - 12:47:24 EST


Add intelligent TX descriptor cleanup mechanism to reclaim abandoned
descriptors when the physical link goes down.

When the link goes down while TX packets are in-flight, the hardware
stops processing descriptors with the OWN bit still set. The current
driver waits indefinitely for these descriptors to complete, which
never happens. This causes:

- TX ring exhaustion (no descriptors available for new packets)
- Memory leaks (skbs never freed)
- DMA mapping leaks (mappings never unmapped)
- Network stack backpressure buildup

Add force-cleanup mechanism in xgbe_tx_poll() that detects link-down
state and reclaims abandoned descriptors. The helper functions and DMA
optimizations support efficient TX shutdown:
- xgbe_wait_for_dma_tx_complete(): Wait for DMA completion with
link-down optimization
- Restructure xgbe_disable_tx() for proper shutdown sequence

Implementation:
1. Check link state at the start of tx_poll
2. If link is down, set force_cleanup flag
3. For descriptors that hardware hasn't completed (!tx_complete):
- If force_cleanup: treat as completed and reclaim resources
- If link up: break and wait for hardware (normal behavior)

The cleanup process:
- Frees skbs that will never be transmitted
- Unmaps DMA mappings
- Resets descriptors for reuse
- Does NOT count as successful transmission (correct statistics)

Benefits:
- Prevents TX ring starvation
- Eliminates memory and DMA mapping leaks
- Enables fast link recovery when link comes back up
- Critical for link aggregation failover scenarios

Signed-off-by: Raju Rangoju <Raju.Rangoju@xxxxxxx>
---
Changes since v2:
- remove the stale function xgbe_reset_tx_queues(), otherwise it would
result in undefined symbol during linking.

drivers/net/ethernet/amd/xgbe/xgbe-common.h | 4 ++
drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 77 ++++++++++++++++++---
drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 43 ++++++++++--
3 files changed, 108 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index c17900a49595..66807d67e984 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -330,6 +330,10 @@
#define MAC_ISR_SMI_WIDTH 1
#define MAC_ISR_TSIS_INDEX 12
#define MAC_ISR_TSIS_WIDTH 1
+#define MAC_ISR_LS_INDEX 24
+#define MAC_ISR_LS_WIDTH 2
+#define MAC_ISR_LSI_INDEX 0
+#define MAC_ISR_LSI_WIDTH 1
#define MAC_MACA1HR_AE_INDEX 31
#define MAC_MACA1HR_AE_WIDTH 1
#define MAC_MDIOIER_SNGLCOMPIE_INDEX 12
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index b7bf74c6bb47..2de974213090 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -3276,28 +3276,83 @@ static void xgbe_enable_tx(struct xgbe_prv_data *pdata)
XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 1);
}

-static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
+/**
+ * xgbe_wait_for_dma_tx_complete - Wait for DMA to complete pending TX
+ * @pdata: driver private data
+ *
+ * Wait for the DMA TX channels to complete all pending descriptors.
+ * This ensures no frames are in-flight before we disable the transmitter.
+ * If link is down, return immediately as TX will never complete.
+ *
+ * Return: 0 on success, -ETIMEDOUT on timeout
+ */
+static int xgbe_wait_for_dma_tx_complete(struct xgbe_prv_data *pdata)
{
+ struct xgbe_channel *channel;
+ struct xgbe_ring *ring;
+ unsigned long timeout;
unsigned int i;
+ bool complete;

- /* Prepare for Tx DMA channel stop */
- for (i = 0; i < pdata->tx_q_count; i++)
- xgbe_prepare_tx_stop(pdata, i);
+ /* If link is down, TX will never complete - skip waiting */
+ if (!pdata->phy.link)
+ return 0;

- /* Disable MAC Tx */
- XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
+ timeout = jiffies + (XGBE_DMA_STOP_TIMEOUT * HZ);

- /* Disable each Tx queue */
- for (i = 0; i < pdata->tx_q_count; i++)
- XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, 0);
+ do {
+ complete = true;

- /* Disable each Tx DMA channel */
+ for (i = 0; i < pdata->channel_count; i++) {
+ channel = pdata->channel[i];
+ ring = channel->tx_ring;
+ if (!ring)
+ continue;
+
+ /* Check if DMA has processed all descriptors */
+ if (ring->dirty != ring->cur) {
+ complete = false;
+ break;
+ }
+ }
+
+ if (complete)
+ return 0;
+
+ usleep_range(100, 200);
+ } while (time_before(jiffies, timeout));
+
+ netif_warn(pdata, drv, pdata->netdev,
+ "timeout waiting for DMA TX to complete\n");
+ return -ETIMEDOUT;
+}
+
+static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
+{
+ unsigned int i;
+
+ /* Step 1: Wait for DMA to complete pending descriptors */
+ xgbe_wait_for_dma_tx_complete(pdata);
+
+ /* Step 2: Disable each Tx DMA channel to stop
+ * processing new descriptors
+ */
for (i = 0; i < pdata->channel_count; i++) {
if (!pdata->channel[i]->tx_ring)
break;
-
XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0);
}
+
+ /* Step 3: Wait for MTL TX queues to drain */
+ for (i = 0; i < pdata->tx_q_count; i++)
+ xgbe_prepare_tx_stop(pdata, i);
+
+ /* Step 4: Disable MTL TX queues */
+ for (i = 0; i < pdata->tx_q_count; i++)
+ XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, 0);
+
+ /* Step 5: Disable MAC TX last */
+ XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
}

static void xgbe_prepare_rx_stop(struct xgbe_prv_data *pdata,
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 6886d3b33ffe..2d6d00e3689b 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -2169,6 +2169,7 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
struct net_device *netdev = pdata->netdev;
struct netdev_queue *txq;
int processed = 0;
+ int force_cleanup;
unsigned int tx_packets = 0, tx_bytes = 0;
unsigned int cur;

@@ -2185,13 +2186,41 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)

txq = netdev_get_tx_queue(netdev, channel->queue_index);

+ /* Smart descriptor cleanup during link-down conditions.
+ *
+ * When link is down, hardware stops processing TX descriptors (OWN bit
+ * remains set). Enable intelligent cleanup to reclaim these abandoned
+ * descriptors and maintain TX queue health.
+ *
+ * This cleanup mechanism enables:
+ * - Continuous TX queue availability for new packets when link recovers
+ * - Clean resource management (skbs, DMA mappings, descriptors)
+ * - Fast failover in link aggregation scenarios
+ */
+ force_cleanup = !pdata->phy.link;
+
while ((processed < XGBE_TX_DESC_MAX_PROC) &&
(ring->dirty != cur)) {
rdata = XGBE_GET_DESC_DATA(ring, ring->dirty);
rdesc = rdata->rdesc;

- if (!hw_if->tx_complete(rdesc))
- break;
+ if (!hw_if->tx_complete(rdesc)) {
+ if (!force_cleanup)
+ break;
+ /* Link-down descriptor cleanup: reclaim abandoned
+ * resources.
+ *
+ * Hardware has stopped processing this descriptor, so
+ * perform intelligent cleanup to free skbs and reclaim
+ * descriptors for future use when link recovers.
+ *
+ * These are not counted as successful transmissions
+ * since packets never reached the wire.
+ */
+ netif_dbg(pdata, tx_err, netdev,
+ "force-freeing stuck TX desc %u (link down)\n",
+ ring->dirty);
+ }

/* Make sure descriptor fields are read after reading the OWN
* bit */
@@ -2200,9 +2229,13 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
if (netif_msg_tx_done(pdata))
xgbe_dump_tx_desc(pdata, ring, ring->dirty, 1, 0);

- if (hw_if->is_last_desc(rdesc)) {
- tx_packets += rdata->tx.packets;
- tx_bytes += rdata->tx.bytes;
+ /* Only count packets actually transmitted (not force-cleaned)
+ */
+ if (!force_cleanup || hw_if->is_last_desc(rdesc)) {
+ if (hw_if->is_last_desc(rdesc)) {
+ tx_packets += rdata->tx.packets;
+ tx_bytes += rdata->tx.bytes;
+ }
}

/* Free the SKB and reset the descriptor for re-use */
--
2.34.1