Re: [PATCH V1 4/6] accel/amdxdna: Add AIE4 firmware loading
From: yidong Zhang
Date: Mon Mar 30 2026 - 16:31:20 EST
On 3/30/26 13:17, Mario Limonciello wrote:
>
>
> On 3/30/26 11:37, Lizhi Hou wrote:
>> From: David Zhang <yidong.zhang@xxxxxxx>
>>
>> Add support for loading AIE4 firmware through the common PSP
>> interfaces.
>>
>> Compared to AIE2, AIE4 introduces an additional CERT firmware image.
>> aiem_psp_create() performs CERT setup when the CERT image size is
>> non-zero.
>>
>> Co-developed-by: Hayden Laccabue <Hayden.Laccabue@xxxxxxx>
>> Signed-off-by: Hayden Laccabue <Hayden.Laccabue@xxxxxxx>
>> Signed-off-by: David Zhang <yidong.zhang@xxxxxxx>
>> Signed-off-by: Lizhi Hou <lizhi.hou@xxxxxxx>
>> ---
>> drivers/accel/amdxdna/aie.h | 4 +
>> drivers/accel/amdxdna/aie2_pci.c | 2 +
>> drivers/accel/amdxdna/aie4_pci.c | 109 ++++++++++++++++++++++-
>> drivers/accel/amdxdna/aie4_pci.h | 4 +
>> drivers/accel/amdxdna/aie_psp.c | 141 +++++++++++++++++++++++-------
>> drivers/accel/amdxdna/npu3_regs.c | 23 +++++
>> 6 files changed, 247 insertions(+), 36 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie.h b/drivers/accel/amdxdna/aie.h
>> index 124c0f7e9ca0..423ed34af9ee 100644
>> --- a/drivers/accel/amdxdna/aie.h
>> +++ b/drivers/accel/amdxdna/aie.h
>> @@ -57,7 +57,11 @@ struct aie_bar_off_pair {
>> struct psp_config {
>> const void *fw_buf;
>> u32 fw_size;
>> + const void *certfw_buf;
>> + u32 certfw_size;
>> void __iomem *psp_regs[PSP_MAX_REGS];
>> + u32 arg2_mask;
>> + u32 notify_val;
>> };
>> /* aie.c */
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
>> index e4b7893bd429..0489e668cd73 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -549,6 +549,8 @@ static int aie2_init(struct amdxdna_dev *xdna)
>> psp_conf.fw_size = fw->size;
>> psp_conf.fw_buf = fw->data;
>> + psp_conf.arg2_mask = GENMASK(23, 0);
>> + psp_conf.notify_val = 1;
>> for (i = 0; i < PSP_MAX_REGS; i++)
>> psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
>> ndev->aie.psp_hdl = aiem_psp_create(&xdna->ddev, &psp_conf);
>> diff --git a/drivers/accel/amdxdna/aie4_pci.c b/drivers/accel/amdxdna/aie4_pci.c
>> index 0f360c1ccebd..e7993b315996 100644
>> --- a/drivers/accel/amdxdna/aie4_pci.c
>> +++ b/drivers/accel/amdxdna/aie4_pci.c
>> @@ -6,11 +6,15 @@
>> #include <drm/amdxdna_accel.h>
>> #include <drm/drm_managed.h>
>> #include <drm/drm_print.h>
>> +#include <linux/firmware.h>
>> +#include <linux/sizes.h>
>> #include "aie4_pci.h"
>> #include "amdxdna_pci_drv.h"
>> -#define NO_IOHUB 0
>> +#define NO_IOHUB 0
>> +#define CERTFW_MAX_SIZE (SZ_32K + SZ_256)
>> +#define PSP_NOTIFY_INTR 0xD007BE11
>> /*
>> * The management mailbox channel is allocated by firmware.
>> @@ -207,13 +211,12 @@ static int aie4_mailbox_init(struct amdxdna_dev *xdna)
>> static void aie4_fw_unload(struct amdxdna_dev_hdl *ndev)
>> {
>> - /* TODO */
>> + aie_psp_stop(ndev->aie.psp_hdl);
>> }
>> static int aie4_fw_load(struct amdxdna_dev_hdl *ndev)
>> {
>> - /* TODO */
>> - return 0;
>> + return aie_psp_start(ndev->aie.psp_hdl);
>> }
>> static int aie4_hw_start(struct amdxdna_dev *xdna)
>> @@ -261,11 +264,98 @@ static void aie4_hw_stop(struct amdxdna_dev *xdna)
>> aie4_fw_unload(ndev);
>> }
>> +static int aie4_request_firmware(struct amdxdna_dev_hdl *ndev,
>> + const struct firmware **npufw,
>> + const struct firmware **certfw)
>> +{
>> + struct amdxdna_dev *xdna = ndev->aie.xdna;
>> + struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>> + char fw_name[128];
>> + int ret;
>> +
>> + ret = snprintf(fw_name, sizeof(fw_name), "amdnpu/%04x_%02x/%s",
>> + pdev->device, pdev->revision, ndev->priv->npufw_path);
>> + if (ret >= sizeof(fw_name)) {
>> + XDNA_ERR(xdna, "npu firmware path is truncated");
>> + return -EINVAL;
>> + }
>> +
>> + ret = request_firmware(npufw, fw_name, &pdev->dev);
>> + if (ret) {
>> + XDNA_ERR(xdna, "failed to request_firmware %s, ret %d", fw_name, ret);
>> + return ret;
>> + }
>> +
>> + ret = snprintf(fw_name, sizeof(fw_name), "amdnpu/%04x_%02x/%s",
>> + pdev->device, pdev->revision, ndev->priv->certfw_path);
>> + if (ret >= sizeof(fw_name)) {
>> + XDNA_ERR(xdna, "cert firmware path is truncated");
>> + ret = -EINVAL;
>> + goto release_npufw;
>> + }
>> +
>> + ret = request_firmware(certfw, fw_name, &pdev->dev);
>> + if (ret) {
>> + XDNA_ERR(xdna, "failed to request_firmware %s, ret %d", fw_name, ret);
>> + goto release_npufw;
>> + }
>> +
>> + if ((*certfw)->size > CERTFW_MAX_SIZE) {
>> + XDNA_ERR(xdna, "CERTFW over maximum size of 32 KB + 256 B");
>> + ret = -EINVAL;
>> + goto release_certfw;
>> + }
>> +
>> + return 0;
>> +
>> +release_certfw:
>> + release_firmware(*certfw);
>> +release_npufw:
>> + release_firmware(*npufw);
>> +
>> + return ret;
>> +}
>> +
>> +static void aie4_release_firmware(struct amdxdna_dev_hdl *ndev,
>> + const struct firmware *npufw,
>> + const struct firmware *certfw)
>> +{
>> + release_firmware(certfw);
>> + release_firmware(npufw);
>> +}
>> +
>> +static int aie4_prepare_firmware(struct amdxdna_dev_hdl *ndev,
>> + const struct firmware *npufw,
>> + const struct firmware *certfw,
>> + void __iomem *tbl[PCI_NUM_RESOURCES])
>> +{
>> + struct amdxdna_dev *xdna = ndev->aie.xdna;
>> + struct psp_config psp_conf;
>> + int i;
>> +
>> + psp_conf.fw_size = npufw->size;
>> + psp_conf.fw_buf = npufw->data;
>> + psp_conf.certfw_size = certfw->size;
>> + psp_conf.certfw_buf = certfw->data;
>> + psp_conf.arg2_mask = ~0;
>> + psp_conf.notify_val = PSP_NOTIFY_INTR;
>> + for (i = 0; i < PSP_MAX_REGS; i++)
>> + psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
>> + ndev->aie.psp_hdl = aiem_psp_create(&xdna->ddev, &psp_conf);
>> + if (!ndev->aie.psp_hdl) {
>> + XDNA_ERR(xdna, "failed to create psp");
>> + return -ENOMEM;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
>> {
>> struct amdxdna_dev *xdna = ndev->aie.xdna;
>> struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>> void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
>> + const struct firmware *npufw, *certfw;
>> unsigned long bars = 0;
>> int ret, i;
>> @@ -282,6 +372,8 @@ static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
>> return ret;
>> }
>> + for (i = 0; i < PSP_MAX_REGS; i++)
>> + set_bit(PSP_REG_BAR(ndev, i), &bars);
>> set_bit(xdna->dev_info->mbox_bar, &bars);
>> set_bit(xdna->dev_info->sram_bar, &bars);
>> @@ -300,6 +392,15 @@ static int aie4_pcidev_init(struct amdxdna_dev_hdl *ndev)
>> pci_set_master(pdev);
>> + ret = aie4_request_firmware(ndev, &npufw, &certfw);
>> + if (ret)
>> + goto clear_master;
>> +
>> + ret = aie4_prepare_firmware(ndev, npufw, certfw, tbl);
>> + aie4_release_firmware(ndev, npufw, certfw);
>> + if (ret)
>> + goto clear_master;
>> +
>> ret = aie4_irq_init(xdna);
>> if (ret)
>> goto clear_master;
>> diff --git a/drivers/accel/amdxdna/aie4_pci.h b/drivers/accel/amdxdna/aie4_pci.h
>> index f3810a969431..ee388ccf7196 100644
>> --- a/drivers/accel/amdxdna/aie4_pci.h
>> +++ b/drivers/accel/amdxdna/aie4_pci.h
>> @@ -14,9 +14,13 @@
>> #include "amdxdna_mailbox.h"
>> struct amdxdna_dev_priv {
>> + const char *npufw_path;
>> + const char *certfw_path;
>> u32 mbox_bar;
>> u32 mbox_rbuf_bar;
>> u64 mbox_info_off;
>> +
>> + struct aie_bar_off_pair psp_regs_off[PSP_MAX_REGS];
>> };
>> struct amdxdna_dev_hdl {
>> diff --git a/drivers/accel/amdxdna/aie_psp.c b/drivers/accel/amdxdna/aie_psp.c
>> index 8743b812a449..458dca7cc5a0 100644
>> --- a/drivers/accel/amdxdna/aie_psp.c
>> +++ b/drivers/accel/amdxdna/aie_psp.c
>> @@ -18,6 +18,7 @@
>> #define PSP_VALIDATE 1
>> #define PSP_START 2
>> #define PSP_RELEASE_TMR 3
>> +#define PSP_VALIDATE_CERT 4
>> /* PSP special arguments */
>> #define PSP_START_COPY_FW 1
>> @@ -27,10 +28,20 @@
>> #define PSP_ERROR_BAD_STATE 0xFFFF0007
>> #define PSP_FW_ALIGN 0x10000
>> +#define PSP_CFW_ALIGN 0x8000
>> #define PSP_POLL_INTERVAL 20000 /* us */
>> #define PSP_POLL_TIMEOUT 1000000 /* us */
>> -#define PSP_REG(p, reg) ((p)->psp_regs[reg])
>> +#define PSP_REG(p, reg) ((p)->conf.psp_regs[reg])
>> +#define PSP_SET_CMD(psp, reg_vals, cmd, arg0, arg1, arg2) \
>> +({ \
>> + u32 *_regs = reg_vals; \
>> + u32 _cmd = cmd; \
>> + _regs[0] = _cmd; \
>> + _regs[1] = arg0; \
>> + _regs[2] = arg1; \
>> + _regs[3] = ((arg2) | ((_cmd) << 24)) & (psp)->conf.arg2_mask; \
>> +})
>> struct psp_device {
>> struct drm_device *ddev;
>> @@ -38,7 +49,9 @@ struct psp_device {
>> u32 fw_buf_sz;
>> u64 fw_paddr;
>> void *fw_buffer;
>> - void __iomem *psp_regs[PSP_MAX_REGS];
>> + u32 certfw_buf_sz;
>> + u64 certfw_paddr;
>> + void *certfw_buffer;
>> };
>> static int psp_exec(struct psp_device *psp, u32 *reg_vals)
>> @@ -47,13 +60,22 @@ static int psp_exec(struct psp_device *psp, u32 *reg_vals)
>> int ret, i;
>> u32 ready;
>> + /* Check for PSP ready before any write */
>> + ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready,
>> + FIELD_GET(PSP_STATUS_READY, ready),
>> + PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT);
>> + if (ret) {
>> + drm_err(psp->ddev, "PSP is not ready, ret 0x%x", ret);
>> + return ret;
>> + }
>> +
>> /* Write command and argument registers */
>> for (i = 0; i < PSP_NUM_IN_REGS; i++)
>> writel(reg_vals[i], PSP_REG(psp, i));
>> /* clear and set PSP INTR register to kick off */
>> writel(0, PSP_REG(psp, PSP_INTR_REG));
>> - writel(1, PSP_REG(psp, PSP_INTR_REG));
>> + writel(psp->conf.notify_val, PSP_REG(psp, PSP_INTR_REG));
>> /* PSP should be busy. Wait for ready, so we know task is done. */
>> ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready,
>> @@ -90,69 +112,124 @@ int aie_psp_waitmode_poll(struct psp_device *psp)
>> void aie_psp_stop(struct psp_device *psp)
>> {
>> - u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, };
>> + u32 reg_vals[PSP_NUM_IN_REGS];
>> int ret;
>> + PSP_SET_CMD(psp, reg_vals, PSP_RELEASE_TMR, 0, 0, 0);
>> +
>> ret = psp_exec(psp, reg_vals);
>> if (ret)
>> drm_err(psp->ddev, "release tmr failed, ret %d", ret);
>> }
>> -int aie_psp_start(struct psp_device *psp)
>> +static int psp_validate_fw(struct psp_device *psp, u8 cmd, u64 paddr, u32 buf_sz)
>> {
>> u32 reg_vals[PSP_NUM_IN_REGS];
>> int ret;
>> - reg_vals[0] = PSP_VALIDATE;
>> - reg_vals[1] = lower_32_bits(psp->fw_paddr);
>> - reg_vals[2] = upper_32_bits(psp->fw_paddr);
>> - reg_vals[3] = psp->fw_buf_sz;
>> + PSP_SET_CMD(psp, reg_vals, cmd, lower_32_bits(paddr),
>> + upper_32_bits(paddr), buf_sz);
>> ret = psp_exec(psp, reg_vals);
>> - if (ret) {
>> + if (ret)
>> drm_err(psp->ddev, "failed to validate fw, ret %d", ret);
>> - return ret;
>> - }
>> - memset(reg_vals, 0, sizeof(reg_vals));
>> - reg_vals[0] = PSP_START;
>> - reg_vals[1] = PSP_START_COPY_FW;
>> + return ret;
>> +}
>> +
>> +static int psp_start(struct psp_device *psp)
>> +{
>> + u32 reg_vals[PSP_NUM_IN_REGS];
>> + int ret;
>> +
>> + PSP_SET_CMD(psp, reg_vals, PSP_START, PSP_START_COPY_FW, 0, 0);
>> +
>> ret = psp_exec(psp, reg_vals);
>> - if (ret) {
>> + if (ret)
>> drm_err(psp->ddev, "failed to start fw, ret %d", ret);
>> +
>> + return ret;
>> +}
>> +
>> +int aie_psp_start(struct psp_device *psp)
>> +{
>> + int ret;
>> +
>> + ret = psp_validate_fw(psp, PSP_VALIDATE,
>> + psp->fw_paddr, psp->fw_buf_sz);
>> + if (ret)
>> return ret;
>> - }
>> - return 0;
>> + if (!psp->certfw_buf_sz)
>> + goto psp_start;
>> +
>> + ret = psp_validate_fw(psp, PSP_VALIDATE_CERT,
>> + psp->certfw_paddr, psp->certfw_buf_sz);
>> + if (ret)
>> + return ret;
>> +psp_start:
>> + return psp_start(psp);
>> +}
>> +
>> +/*
>> + * PSP requires host physical address to load firmware.
>> + * Allocate a buffer, obtain its physical address, align, and copy data in.
>> + */
>> +static void *psp_alloc_fw_buf(struct psp_device *psp, const void *fw_data,
>> + u32 fw_size, u32 align, u32 *buf_sz,
>> + u64 *paddr)
>> +{
>> + u32 alloc_sz;
>> + void *buffer;
>> + u64 offset;
>> +
>> + *buf_sz = ALIGN(fw_size, align);
>> + alloc_sz = *buf_sz + align;
>> +
>> + buffer = drmm_kmalloc(psp->ddev, alloc_sz, GFP_KERNEL);
>> + if (!buffer)
>> + return NULL;
>> +
>> + *paddr = virt_to_phys(buffer);
>> + offset = ALIGN(*paddr, align) - *paddr;
>> + *paddr += offset;
>> + memcpy(buffer + offset, fw_data, fw_size);
>> +
>> + return buffer;
>> }
>> struct psp_device *aiem_psp_create(struct drm_device *ddev, struct psp_config *conf)
>> {
>> struct psp_device *psp;
>> - u64 offset;
>> psp = drmm_kzalloc(ddev, sizeof(*psp), GFP_KERNEL);
>> if (!psp)
>> return NULL;
>> psp->ddev = ddev;
>> - memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
>> + psp->fw_buffer = psp_alloc_fw_buf(psp, conf->fw_buf, conf->fw_size,
>> + PSP_FW_ALIGN, &psp->fw_buf_sz,
>> + &psp->fw_paddr);
>> + if (!psp->fw_buffer)
>> + return NULL;
>> +
>> + if (!conf->certfw_size) {
>> + drm_dbg(ddev, "no cert fw");
>> + goto done;
>> + }
>> - psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN);
>> - psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL);
>> - if (!psp->fw_buffer) {
>> - drm_err(ddev, "no memory for fw buffer");
>> + /* CERT firmware */
>> + psp->certfw_buffer = psp_alloc_fw_buf(psp, conf->certfw_buf,
>> + conf->certfw_size, PSP_CFW_ALIGN,
>> + &psp->certfw_buf_sz,
>> + &psp->certfw_paddr);
>> + if (!psp->certfw_buffer) {
>> + drm_err(ddev, "no memory for cert fw buffer");
>> return NULL;
>> }
>> - /*
>> - * AMD Platform Security Processor(PSP) requires host physical
>> - * address to load NPU firmware.
>> - */
>> - psp->fw_paddr = virt_to_phys(psp->fw_buffer);
>> - offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
>> - psp->fw_paddr += offset;
>> - memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
>> +done:
>> + memcpy(&psp->conf, conf, sizeof(psp->conf));
>> return psp;
>> }
>> diff --git a/drivers/accel/amdxdna/npu3_regs.c b/drivers/accel/amdxdna/npu3_regs.c
>> index f6e20f4858db..fb2bd60b8f00 100644
>> --- a/drivers/accel/amdxdna/npu3_regs.c
>> +++ b/drivers/accel/amdxdna/npu3_regs.c
>> @@ -16,6 +16,15 @@
>> /* PCIe BAR Index for NPU3 */
>> #define NPU3_REG_BAR_INDEX 0
>> +#define NPU3_PSP_BAR_INDEX 4
>> +
>> +#define MMNPU_APERTURE3_BASE 0x3810000
>> +#define NPU3_PSP_BAR_BASE MMNPU_APERTURE3_BASE
>> +
>> +#define MPASP_C2PMSG_123_ALT_1 0x3810AEC
>> +#define MPASP_C2PMSG_156_ALT_1 0x3810B70
>> +#define MPASP_C2PMSG_157_ALT_1 0x3810B74
>> +#define MPASP_C2PMSG_73_ALT_1 0x3810A24
>> static const struct amdxdna_fw_feature_tbl npu3_fw_feature_table[] = {
>> { .major = 5, .min_minor = 10 },
>> @@ -23,14 +32,28 @@ static const struct amdxdna_fw_feature_tbl npu3_fw_feature_table[] = {
>> };
>> static const struct amdxdna_dev_priv npu3_dev_priv = {
>> + .npufw_path = "npu.dev.sbin",
>> + .certfw_path = "cert.dev.sbin",
>> .mbox_bar = NPU3_MBOX_BAR,
>> .mbox_rbuf_bar = NPU3_MBOX_BUFFER_BAR,
>> .mbox_info_off = NPU3_MBOX_INFO_OFF,
>> + .psp_regs_off = {
>> + DEFINE_BAR_OFFSET(PSP_CMD_REG, NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
>> + DEFINE_BAR_OFFSET(PSP_ARG0_REG, NPU3_PSP, MPASP_C2PMSG_156_ALT_1),
>> + DEFINE_BAR_OFFSET(PSP_ARG1_REG, NPU3_PSP, MPASP_C2PMSG_157_ALT_1),
>> + DEFINE_BAR_OFFSET(PSP_ARG2_REG, NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
>> + DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU3_PSP, MPASP_C2PMSG_73_ALT_1),
>> + DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU3_PSP, MPASP_C2PMSG_123_ALT_1),
>> + DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU3_PSP, MPASP_C2PMSG_156_ALT_1),
>> + /* npu3 doesn't use 8th pwaitmode register */
>> + },
>> +
>
> Spurious whitespace here that you ping pong in the later patches.
Thank you so much! I will fix this.
/David
>
>> };
>> const struct amdxdna_dev_info dev_npu3_pf_info = {
>> .mbox_bar = NPU3_MBOX_BAR,
>> .sram_bar = NPU3_MBOX_BUFFER_BAR,
>> + .psp_bar = NPU3_PSP_BAR_INDEX,
>> .vbnv = "RyzenAI-npu3-pf",
>> .device_type = AMDXDNA_DEV_TYPE_PF,
>> .dev_priv = &npu3_dev_priv,
>