Re: [PATCH V1 4/6] accel/amdxdna: Add AIE4 firmware loading

From: yidong Zhang

Date: Mon Mar 30 2026 - 16:31:20 EST



On 3/30/26 13:17, Mario Limonciello wrote:
>
>
> On 3/30/26 11:37, Lizhi Hou wrote:
>> From: David Zhang <yidong.zhang@xxxxxxx>
>>
>> Add support for loading AIE4 firmware through the common PSP
>> interfaces.
>>
>> Compared to AIE2, AIE4 introduces an additional CERT firmware image.
>> aiem_psp_create() performs CERT setup when the CERT image size is
>> non-zero.
>>
>> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@xxxxxxx>
>> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@xxxxxxx>
>> Signed-off-by: David Zhang <yidong.zhang@xxxxxxx>
>> Signed-off-by: Lizhi Hou <lizhi.hou@xxxxxxx>
>> ---
>>   drivers/accel/amdxdna/aie.h       |   4 +
>>   drivers/accel/amdxdna/aie2_pci.c  |   2 +
>>   drivers/accel/amdxdna/aie4_pci.c  | 109 ++++++++++++++++++++++-
>>   drivers/accel/amdxdna/aie4_pci.h  |   4 +
>>   drivers/accel/amdxdna/aie_psp.c   | 141 +++++++++++++++++++++++-------
>>   drivers/accel/amdxdna/npu3_regs.c |  23 +++++
>>   6 files changed, 247 insertions(+), 36 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
>> index 124c0f7e9ca0..423ed34af9ee 100644
>> --- a/drivers/accel/amdxdna/aie.h
>> +++ b/drivers/accel/amdxdna/aie.h
>> @@ -57,7 +57,11 @@ struct aie_bar_off_pair {
>>   struct psp_config {
>>       const void        *fw_buf;
>>       u32            fw_size;
>> +    const void              *certfw_buf;
>> +    u32                     certfw_size;
>>       void __iomem        *psp_regs[PSP_MAX_REGS];
>> +    u32            arg2_mask;
>> +    u32            notify_val;
>>   };
>>     /* aie.c */
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
>> index e4b7893bd429..0489e668cd73 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -549,6 +549,8 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>         psp_conf.fw_size = fw->size;
>>       psp_conf.fw_buf = fw->data;
>> +    psp_conf.arg2_mask = GENMASK(23, 0);
>> +    psp_conf.notify_val = 1;
>>       for (i = 0; i < PSP_MAX_REGS; i++)
>>           psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
>>       ndev->aie.psp_hdl = aiem_psp_create(&xdna->ddev, &psp_conf);
>> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
>> index 0f360c1ccebd..e7993b315996 100644
>> --- a/drivers/accel/amdxdna/aie4_pci.c
>> +++ b/drivers/accel/amdxdna/aie4_pci.c
>> @@ -6,11 +6,15 @@
>>   #include <drm/amdxdna_accel.h>
>>   #include <drm/drm_managed.h>
>>   #include <drm/drm_print.h>
>> +#include <linux/firmware.h>
>> +#include <linux/sizes.h>
>>     #include "aie4_pci.h"
>>   #include "amdxdna_pci_drv.h"
>>   -#define NO_IOHUB    0
>> +#define NO_IOHUB        0
>> +#define CERTFW_MAX_SIZE         (SZ_32K + SZ_256)
>> +#define PSP_NOTIFY_INTR        0xD007BE11
>>     /*
>>    * The management mailbox channel is allocated by firmware.
>> @@ -207,13 +211,12 @@ static int aie4_mailbox_init(struct amdxdna_dev *xdna)
>>     static void aie4_fw_unload(struct amdxdna_dev_hdl *ndev)
>>   {
>> -    /* TODO */
>> +    aie_psp_stop(ndev->aie.psp_hdl);
>>   }
>>     static int aie4_fw_load(struct amdxdna_dev_hdl *ndev)
>>   {
>> -    /* TODO */
>> -    return 0;
>> +    return aie_psp_start(ndev->aie.psp_hdl);
>>   }
>>     static int aie4_hw_start(struct amdxdna_dev *xdna)
>> @@ -261,11 +264,98 @@ static void aie4_hw_stop(struct amdxdna_dev *xdna)
>>       aie4_fw_unload(ndev);
>>   }
>>   +static int aie4_request_firmware(struct amdxdna_dev_hdl *ndev,
>> +                 const struct firmware **npufw,
>> +                 const struct firmware **certfw)
>> +{
>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>> +    struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>> +    char fw_name[128];
>> +    int ret;
>> +
>> +    ret = snprintf(fw_name, sizeof(fw_name), "amdnpu/%04x_%02x/%s",
>> +               pdev->device, pdev->revision, ndev->priv->npufw_path);
>> +    if (ret >= sizeof(fw_name)) {
>> +        XDNA_ERR(xdna, "npu firmware path is truncated");
>> +        return -EINVAL;
>> +    }
>> +
>> +    ret = request_firmware(npufw, fw_name, &pdev->dev);
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "failed to request_firmware %s, ret %d", fw_name, ret);
>> +        return ret;
>> +    }
>> +
>> +    ret = snprintf(fw_name, sizeof(fw_name), "amdnpu/%04x_%02x/%s",
>> +               pdev->device, pdev->revision, ndev->priv->certfw_path);
>> +    if (ret >= sizeof(fw_name)) {
>> +        XDNA_ERR(xdna, "cert firmware path is truncated");
>> +        ret = -EINVAL;
>> +        goto release_npufw;
>> +    }
>> +
>> +    ret = request_firmware(certfw, fw_name, &pdev->dev);
>> +    if (ret) {
>> +        XDNA_ERR(xdna, "failed to request_firmware %s, ret %d", fw_name, ret);
>> +        goto release_npufw;
>> +    }
>> +
>> +    if ((*certfw)->size > CERTFW_MAX_SIZE) {
>> +        XDNA_ERR(xdna, "CERTFW over maximum size of 32 KB + 256 B");
>> +        ret = -EINVAL;
>> +        goto release_certfw;
>> +    }
>> +
>> +    return 0;
>> +
>> +release_certfw:
>> +    release_firmware(*certfw);
>> +release_npufw:
>> +    release_firmware(*npufw);
>> +
>> +    return ret;
>> +}
>> +
>> +static void aie4_release_firmware(struct amdxdna_dev_hdl *ndev,
>> +                  const struct firmware *npufw,
>> +                  const struct firmware *certfw)
>> +{
>> +    release_firmware(certfw);
>> +    release_firmware(npufw);
>> +}
>> +
>> +static int aie4_prepare_firmware(struct amdxdna_dev_hdl *ndev,
>> +                 const struct firmware *npufw,
>> +                 const struct firmware *certfw,
>> +                 void __iomem *tbl[PCI_NUM_RESOURCES])
>> +{
>> +    struct amdxdna_dev *xdna = ndev->aie.xdna;
>> +    struct psp_config psp_conf;
>> +    int i;
>> +
>> +    psp_conf.fw_size = npufw->size;
>> +    psp_conf.fw_buf = npufw->data;
>> +    psp_conf.certfw_size = certfw->size;
>> +    psp_conf.certfw_buf = certfw->data;
>> +    psp_conf.arg2_mask = ~0;
>> +    psp_conf.notify_val = PSP_NOTIFY_INTR;
>> +    for (i = 0; i < PSP_MAX_REGS; i++)
>> +        psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
>> +    ndev->aie.psp_hdl = aiem_psp_create(&xdna->ddev, &psp_conf);
>> +    if (!ndev->aie.psp_hdl) {
>> +        XDNA_ERR(xdna, "failed to create psp");
>> +        return -ENOMEM;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
>>   {
>>       struct amdxdna_dev *xdna = ndev->aie.xdna;
>>       struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>>       void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
>> +    const struct firmware *npufw, *certfw;
>>       unsigned long bars = 0;
>>       int ret, i;
>>   @@ -282,6 +372,8 @@ static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
>>           return ret;
>>       }
>>   +    for (i = 0; i < PSP_MAX_REGS; i++)
>> +        set_bit(PSP_REG_BAR(ndev, i), &bars);
>>       set_bit(xdna->dev_info->mbox_bar, &bars);
>>       set_bit(xdna->dev_info->sram_bar, &bars);
>>   @@ -300,6 +392,15 @@ static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
>>         pci_set_master(pdev);
>>   +    ret = aie4_request_firmware(ndev, &npufw, &certfw);
>> +    if (ret)
>> +        goto clear_master;
>> +
>> +    ret = aie4_prepare_firmware(ndev, npufw, certfw, tbl);
>> +    aie4_release_firmware(ndev, npufw, certfw);
>> +    if (ret)
>> +        goto clear_master;
>> +
>>       ret = aie4_irq_init(xdna);
>>       if (ret)
>>           goto clear_master;
>> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
>> index f3810a969431..ee388ccf7196 100644
>> --- a/drivers/accel/amdxdna/aie4_pci.h
>> +++ b/drivers/accel/amdxdna/aie4_pci.h
>> @@ -14,9 +14,13 @@
>>   #include "amdxdna_mailbox.h"
>>     struct amdxdna_dev_priv {
>> +    const char              *npufw_path;
>> +    const char              *certfw_path;
>>       u32            mbox_bar;
>>       u32            mbox_rbuf_bar;
>>       u64            mbox_info_off;
>> +
>> +    struct aie_bar_off_pair    psp_regs_off[PSP_MAX_REGS];
>>   };
>>     struct amdxdna_dev_hdl {
>> diff --git a/drivers/accel/amdxdna/aie_psp.c b/drivers/accel/amdxdna/aie_psp.c
>> index 8743b812a449..458dca7cc5a0 100644
>> --- a/drivers/accel/amdxdna/aie_psp.c
>> +++ b/drivers/accel/amdxdna/aie_psp.c
>> @@ -18,6 +18,7 @@
>>   #define PSP_VALIDATE        1
>>   #define PSP_START        2
>>   #define PSP_RELEASE_TMR        3
>> +#define PSP_VALIDATE_CERT       4
>>     /* PSP special arguments */
>>   #define PSP_START_COPY_FW    1
>> @@ -27,10 +28,20 @@
>>   #define PSP_ERROR_BAD_STATE    0xFFFF0007
>>     #define PSP_FW_ALIGN        0x10000
>> +#define PSP_CFW_ALIGN           0x8000
>>   #define PSP_POLL_INTERVAL    20000    /* us */
>>   #define PSP_POLL_TIMEOUT    1000000    /* us */
>>   -#define PSP_REG(p, reg) ((p)->psp_regs[reg])
>> +#define PSP_REG(p, reg) ((p)->conf.psp_regs[reg])
>> +#define PSP_SET_CMD(psp, reg_vals, cmd, arg0, arg1, arg2)        \
>> +({                                    \
>> +    u32 *_regs = reg_vals;                        \
>> +    u32 _cmd = cmd;                            \
>> +    _regs[0] = _cmd;                        \
>> +    _regs[1] = arg0;                        \
>> +    _regs[2] = arg1;                        \
>> +    _regs[3] = ((arg2) | ((_cmd) << 24)) & (psp)->conf.arg2_mask;    \
>> +})
>>     struct psp_device {
>>       struct drm_device    *ddev;
>> @@ -38,7 +49,9 @@ struct psp_device {
>>       u32            fw_buf_sz;
>>       u64            fw_paddr;
>>       void            *fw_buffer;
>> -    void __iomem        *psp_regs[PSP_MAX_REGS];
>> +    u32                     certfw_buf_sz;
>> +    u64                     certfw_paddr;
>> +    void                    *certfw_buffer;
>>   };
>>     static int psp_exec(struct psp_device *psp, u32 *reg_vals)
>> @@ -47,13 +60,22 @@ static int psp_exec(struct psp_device *psp, u32 *reg_vals)
>>       int ret, i;
>>       u32 ready;
>>   +    /* Check for PSP ready before any write */
>> +    ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready,
>> +                 FIELD_GET(PSP_STATUS_READY, ready),
>> +                 PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT);
>> +    if (ret) {
>> +        drm_err(psp->ddev, "PSP is not ready, ret 0x%x", ret);
>> +        return ret;
>> +    }
>> +
>>       /* Write command and argument registers */
>>       for (i = 0; i < PSP_NUM_IN_REGS; i++)
>>           writel(reg_vals[i], PSP_REG(psp, i));
>>         /* clear and set PSP INTR register to kick off */
>>       writel(0, PSP_REG(psp, PSP_INTR_REG));
>> -    writel(1, PSP_REG(psp, PSP_INTR_REG));
>> +    writel(psp->conf.notify_val, PSP_REG(psp, PSP_INTR_REG));
>>         /* PSP should be busy. Wait for ready, so we know task is done. */
>>       ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready,
>> @@ -90,69 +112,124 @@ int aie_psp_waitmode_poll(struct psp_device *psp)
>>     void aie_psp_stop(struct psp_device *psp)
>>   {
>> -    u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, };
>> +    u32 reg_vals[PSP_NUM_IN_REGS];
>>       int ret;
>>   +    PSP_SET_CMD(psp, reg_vals, PSP_RELEASE_TMR, 0, 0, 0);
>> +
>>       ret = psp_exec(psp, reg_vals);
>>       if (ret)
>>           drm_err(psp->ddev, "release tmr failed, ret %d", ret);
>>   }
>>   -int aie_psp_start(struct psp_device *psp)
>> +static int psp_validate_fw(struct psp_device *psp, u8 cmd, u64 paddr, u32 buf_sz)
>>   {
>>       u32 reg_vals[PSP_NUM_IN_REGS];
>>       int ret;
>>   -    reg_vals[0] = PSP_VALIDATE;
>> -    reg_vals[1] = lower_32_bits(psp->fw_paddr);
>> -    reg_vals[2] = upper_32_bits(psp->fw_paddr);
>> -    reg_vals[3] = psp->fw_buf_sz;
>> +    PSP_SET_CMD(psp, reg_vals, cmd, lower_32_bits(paddr),
>> +            upper_32_bits(paddr), buf_sz);
>>         ret = psp_exec(psp, reg_vals);
>> -    if (ret) {
>> +    if (ret)
>>           drm_err(psp->ddev, "failed to validate fw, ret %d", ret);
>> -        return ret;
>> -    }
>>   -    memset(reg_vals, 0, sizeof(reg_vals));
>> -    reg_vals[0] = PSP_START;
>> -    reg_vals[1] = PSP_START_COPY_FW;
>> +    return ret;
>> +}
>> +
>> +static int psp_start(struct psp_device *psp)
>> +{
>> +    u32 reg_vals[PSP_NUM_IN_REGS];
>> +    int ret;
>> +
>> +    PSP_SET_CMD(psp, reg_vals, PSP_START, PSP_START_COPY_FW, 0, 0);
>> +
>>       ret = psp_exec(psp, reg_vals);
>> -    if (ret) {
>> +    if (ret)
>>           drm_err(psp->ddev, "failed to start fw, ret %d", ret);
>> +
>> +    return ret;
>> +}
>> +
>> +int aie_psp_start(struct psp_device *psp)
>> +{
>> +    int ret;
>> +
>> +    ret = psp_validate_fw(psp, PSP_VALIDATE,
>> +                  psp->fw_paddr, psp->fw_buf_sz);
>> +    if (ret)
>>           return ret;
>> -    }
>>   -    return 0;
>> +    if (!psp->certfw_buf_sz)
>> +        goto psp_start;
>> +
>> +    ret = psp_validate_fw(psp, PSP_VALIDATE_CERT,
>> +                  psp->certfw_paddr, psp->certfw_buf_sz);
>> +    if (ret)
>> +        return ret;
>> +psp_start:
>> +    return psp_start(psp);
>> +}
>> +
>> +/*
>> + * PSP requires host physical address to load firmware.
>> + * Allocate a buffer, obtain its physical address, align, and copy data in.
>> + */
>> +static void *psp_alloc_fw_buf(struct psp_device *psp, const void *fw_data,
>> +                  u32 fw_size, u32 align, u32 *buf_sz,
>> +                  u64 *paddr)
>> +{
>> +    u32 alloc_sz;
>> +    void *buffer;
>> +    u64 offset;
>> +
>> +    *buf_sz = ALIGN(fw_size, align);
>> +    alloc_sz = *buf_sz + align;
>> +
>> +    buffer = drmm_kmalloc(psp->ddev, alloc_sz, GFP_KERNEL);
>> +    if (!buffer)
>> +        return NULL;
>> +
>> +    *paddr = virt_to_phys(buffer);
>> +    offset = ALIGN(*paddr, align) - *paddr;
>> +    *paddr += offset;
>> +    memcpy(buffer + offset, fw_data, fw_size);
>> +
>> +    return buffer;
>>   }
>>     struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config *conf)
>>   {
>>       struct psp_device *psp;
>> -    u64 offset;
>>         psp = drmm_kzalloc(ddev, sizeof(*psp), GFP_KERNEL);
>>       if (!psp)
>>           return NULL;
>>         psp->ddev = ddev;
>> -    memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
>> +    psp->fw_buffer = psp_alloc_fw_buf(psp, conf->fw_buf, conf->fw_size,
>> +                      PSP_FW_ALIGN, &psp->fw_buf_sz,
>> +                      &psp->fw_paddr);
>> +    if (!psp->fw_buffer)
>> +        return NULL;
>> +
>> +    if (!conf->certfw_size) {
>> +        drm_dbg(ddev, "no cert fw");
>> +        goto done;
>> +    }
>>   -    psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN);
>> -    psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL);
>> -    if (!psp->fw_buffer) {
>> -        drm_err(ddev, "no memory for fw buffer");
>> +    /* CERT firmware */
>> +    psp->certfw_buffer = psp_alloc_fw_buf(psp, conf->certfw_buf,
>> +                          conf->certfw_size, PSP_CFW_ALIGN,
>> +                          &psp->certfw_buf_sz,
>> +                          &psp->certfw_paddr);
>> +    if (!psp->certfw_buffer) {
>> +        drm_err(ddev, "no memory for cert fw buffer");
>>           return NULL;
>>       }
>>   -    /*
>> -     * AMD Platform Security Processor(PSP) requires host physical
>> -     * address to load NPU firmware.
>> -     */
>> -    psp->fw_paddr = virt_to_phys(psp->fw_buffer);
>> -    offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
>> -    psp->fw_paddr += offset;
>> -    memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
>> +done:
>> +    memcpy(&psp->conf, conf, sizeof(psp->conf));
>>         return psp;
>>   }
>> diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/amdxdna/npu3_regs.c
>> index f6e20f4858db..fb2bd60b8f00 100644
>> --- a/drivers/accel/amdxdna/npu3_regs.c
>> +++ b/drivers/accel/amdxdna/npu3_regs.c
>> @@ -16,6 +16,15 @@
>>     /* PCIe BAR Index for NPU3 */
>>   #define NPU3_REG_BAR_INDEX    0
>> +#define NPU3_PSP_BAR_INDEX      4
>> +
>> +#define MMNPU_APERTURE3_BASE    0x3810000
>> +#define NPU3_PSP_BAR_BASE       MMNPU_APERTURE3_BASE
>> +
>> +#define MPASP_C2PMSG_123_ALT_1  0x3810AEC
>> +#define MPASP_C2PMSG_156_ALT_1  0x3810B70
>> +#define MPASP_C2PMSG_157_ALT_1  0x3810B74
>> +#define MPASP_C2PMSG_73_ALT_1   0x3810A24
>>     static const struct amdxdna_fw_feature_tbl npu3_fw_feature_table[] = {
>>       { .major = 5, .min_minor = 10 },
>> @@ -23,14 +32,28 @@ static const struct amdxdna_fw_feature_tbl npu3_fw_feature_table[] = {
>>   };
>>     static const struct amdxdna_dev_priv npu3_dev_priv = {
>> +    .npufw_path             = "npu.dev.sbin",
>> +    .certfw_path            = "cert.dev.sbin",
>>       .mbox_bar        = NPU3_MBOX_BAR,
>>       .mbox_rbuf_bar        = NPU3_MBOX_BUFFER_BAR,
>>       .mbox_info_off        = NPU3_MBOX_INFO_OFF,
>> +    .psp_regs_off   = {
>> +        DEFINE_BAR_OFFSET(PSP_CMD_REG,    NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
>> +        DEFINE_BAR_OFFSET(PSP_ARG0_REG,   NPU3_PSP, MPASP_C2PMSG_156_ALT_1),
>> +        DEFINE_BAR_OFFSET(PSP_ARG1_REG,   NPU3_PSP, MPASP_C2PMSG_157_ALT_1),
>> +        DEFINE_BAR_OFFSET(PSP_ARG2_REG,   NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
>> +        DEFINE_BAR_OFFSET(PSP_INTR_REG,   NPU3_PSP, MPASP_C2PMSG_73_ALT_1),
>> +        DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
>> +        DEFINE_BAR_OFFSET(PSP_RESP_REG,   NPU3_PSP, MPASP_C2PMSG_156_ALT_1),
>> +        /* npu3 doesn't use 8th pwaitmode register */
>> +    },
>> +
>
> Spurious whitespace here that you ping pong in the later patches.


Thank you so much! I will fix this.

/David

>
>>   };
>>     const struct amdxdna_dev_info dev_npu3_pf_info = {
>>       .mbox_bar        = NPU3_MBOX_BAR,
>>       .sram_bar        = NPU3_MBOX_BUFFER_BAR,
>> +    .psp_bar                = NPU3_PSP_BAR_INDEX,
>>       .vbnv            = "RyzenAI-npu3-pf",
>>       .device_type        = AMDXDNA_DEV_TYPE_PF,
>>       .dev_priv        = &npu3_dev_priv,
>