[PATCH 7/8] EDAC/{skx_common,i10nm}: Prepare RRL for sub-channel granularity
From: Qiuxu Zhuo
Date: Thu May 21 2026 - 03:40:54 EST
To prepare for enabling Diamond Rapids server RRL (Retry Read error Log),
which operates at sub-channel granularity by converting struct
res_config::reg_rrl_ddr from a single pointer to an array (reg_rrl_ddr[2])
and updating all users in i10nm_edac and skx_common accordingly.
Initialize only reg_rrl_ddr[0] for existing platforms and prepare for
supporting two RRL set groups per DDR channel (one per sub-channel)
when present.
Tested-by: Yi Lai <yi1.lai@xxxxxxxxx>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@xxxxxxxxx>
---
drivers/edac/i10nm_base.c | 12 ++++++------
drivers/edac/skx_common.c | 35 +++++++++++++++++++++++++++++------
drivers/edac/skx_common.h | 2 +-
3 files changed, 36 insertions(+), 13 deletions(-)
diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 2e21eefbb4f5..fa1853f252b0 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -767,7 +767,7 @@ static struct res_config i10nm_cfg0 = {
.ddr_mdev_bdf = {0, 12, 0},
.hbm_mdev_bdf = {0, 12, 1},
.sad_all_offset = 0x108,
- .reg_rrl_ddr = &icx_reg_rrl_ddr,
+ .reg_rrl_ddr[0] = &icx_reg_rrl_ddr,
};
static struct res_config i10nm_cfg1 = {
@@ -785,7 +785,7 @@ static struct res_config i10nm_cfg1 = {
.ddr_mdev_bdf = {0, 12, 0},
.hbm_mdev_bdf = {0, 12, 1},
.sad_all_offset = 0x108,
- .reg_rrl_ddr = &icx_reg_rrl_ddr,
+ .reg_rrl_ddr[0] = &icx_reg_rrl_ddr,
};
static struct res_config spr_cfg = {
@@ -808,7 +808,7 @@ static struct res_config spr_cfg = {
.ddr_mdev_bdf = {0, 12, 0},
.hbm_mdev_bdf = {0, 12, 1},
.sad_all_offset = 0x300,
- .reg_rrl_ddr = &spr_reg_rrl_ddr,
+ .reg_rrl_ddr[0] = &spr_reg_rrl_ddr,
.reg_rrl_hbm[0] = &spr_reg_rrl_hbm_pch0,
.reg_rrl_hbm[1] = &spr_reg_rrl_hbm_pch1,
};
@@ -828,7 +828,7 @@ static struct res_config gnr_cfg = {
.uracu_bdf = {0, 0, 1},
.ddr_mdev_bdf = {0, 5, 1},
.sad_all_offset = 0x300,
- .reg_rrl_ddr = &gnr_reg_rrl_ddr,
+ .reg_rrl_ddr[0] = &gnr_reg_rrl_ddr,
};
static const struct x86_cpu_id i10nm_cpuids[] = {
@@ -1023,7 +1023,7 @@ static int __init i10nm_init(void)
skx_setup_debug("i10nm_test");
res_cfg->rrl_ctrl_mode = retry_rd_err_log;
- if (retry_rd_err_log && res_cfg->reg_rrl_ddr) {
+ if (retry_rd_err_log && res_cfg->reg_rrl_ddr[0]) {
skx_set_show_rrl(skx_show_rrl);
if (retry_rd_err_log == RRL_CTRL_LINUX)
skx_enable_rrl(true);
@@ -1045,7 +1045,7 @@ static void __exit i10nm_exit(void)
skx_set_decode(NULL);
- if (retry_rd_err_log && res_cfg->reg_rrl_ddr) {
+ if (retry_rd_err_log && res_cfg->reg_rrl_ddr[0]) {
if (retry_rd_err_log == RRL_CTRL_LINUX)
skx_enable_rrl(false);
skx_set_show_rrl(NULL);
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 4d832f02fb59..bfd0cd1689ed 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -173,15 +173,18 @@ static void enable_rrls(struct skx_imc *imc, int chan, struct reg_rrl *rrl,
static void enable_rrls_ddr(struct skx_imc *imc, bool enable)
{
- struct reg_rrl *rrl_ddr = skx_res_cfg->reg_rrl_ddr;
+ struct reg_rrl **rrl_ddr = skx_res_cfg->reg_rrl_ddr;
int i, chan_num = skx_res_cfg->ddr_chan_num;
struct skx_channel *chan = imc->chan;
if (!imc->mbase)
return;
- for (i = 0; i < chan_num; i++)
- enable_rrls(imc, i, rrl_ddr, enable, chan[i].rrl_ctl[0]);
+ for (i = 0; i < chan_num; i++) {
+ enable_rrls(imc, i, rrl_ddr[0], enable, chan[i].rrl_ctl[0]);
+ if (rrl_ddr[1])
+ enable_rrls(imc, i, rrl_ddr[1], enable, chan[i].rrl_ctl[1]);
+ }
}
static void enable_rrls_hbm(struct skx_imc *imc, bool enable)
@@ -218,10 +221,31 @@ void skx_enable_rrl(bool enable)
}
EXPORT_SYMBOL_GPL(skx_enable_rrl);
+static struct reg_rrl *get_rrl_reg(struct decoded_addr *res, struct res_config *cfg)
+{
+ struct skx_imc *imc = &res->dev->imc[res->imc];
+
+ /* HBM has two groups of RRL sets, one per pseudo-channel. */
+ if (imc->hbm_mc)
+ return cfg->reg_rrl_hbm[res->cs & 1];
+
+ /* One group of RRL sets per DDR channel. */
+ if (!cfg->reg_rrl_ddr[1])
+ return cfg->reg_rrl_ddr[0];
+
+ if (res->subch == -1) {
+ skx_printk(KERN_ERR, "Invalid sub-channel id (-1), possibly missing %s ADXL component.\n", component_names[INDEX_SUBCH]);
+ return NULL;
+ }
+
+ /* Two groups of RRL sets per DDR channel (e.g., DMR: one group per sub-channel). */
+ return cfg->reg_rrl_ddr[res->subch & 1];
+}
+
void skx_show_rrl(struct decoded_addr *res, char *msg, int len, bool scrub_err)
{
- int i, j, n, ch = res->channel, pch = res->cs & 1;
struct skx_imc *imc = &res->dev->imc[res->imc];
+ int i, j, n, ch = res->channel;
u64 log, corr, status_mask;
struct reg_rrl *rrl;
bool scrub;
@@ -231,8 +255,7 @@ void skx_show_rrl(struct decoded_addr *res, char *msg, int len, bool scrub_err)
if (!imc->mbase)
return;
- rrl = imc->hbm_mc ? skx_res_cfg->reg_rrl_hbm[pch] : skx_res_cfg->reg_rrl_ddr;
-
+ rrl = get_rrl_reg(res, skx_res_cfg);
if (!rrl)
return;
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 38d96bf71fd9..6d4cf0dd412a 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -283,7 +283,7 @@ struct res_config {
int hbm_chan_mmio_sz;
bool support_ddr5;
/* RRL register sets per DDR channel */
- struct reg_rrl *reg_rrl_ddr;
+ struct reg_rrl *reg_rrl_ddr[2];
/* RRL register sets per HBM channel */
struct reg_rrl *reg_rrl_hbm[2];
/* RRL control mode */
--
2.43.0