Re: [patch 08/10] rseq: Implement read only ABI enforcement for optimized RSEQ V2 mode
From: Dmitry Vyukov
Date: Wed Apr 29 2026 - 05:37:14 EST
On Wed, 29 Apr 2026 at 01:34, Thomas Gleixner <tglx@xxxxxxxxxx> wrote:
>
> The optimized RSEQ V2 mode requires that user space adheres to the ABI
> specification and does not modify the read-only fields cpu_id_start,
> cpu_id, node_id and mm_cid behind the kernel's back.
>
> While the kernel does not rely on these fields, the adherence to this is a
> fundamental prerequisite to allow multiple entities, e.g. libraries, in an
> application to utilize the full potential of RSEQ without stepping on each
> other toes.
>
> Validate this adherence on every update of these fields. If the kernel
> detects that user space modified the fields, the application is force
> terminated.
>
> Fixes: d6200245c75e ("rseq: Allow registering RSEQ with slice extension")
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx
Reviewed-by: Dmitry Vyukov <dvyukov@xxxxxxxxxx>
> ---
> include/linux/rseq_entry.h | 71 +++++++++++++++++----------------------------
> 1 file changed, 28 insertions(+), 43 deletions(-)
>
> --- a/include/linux/rseq_entry.h
> +++ b/include/linux/rseq_entry.h
> @@ -248,7 +248,6 @@ static __always_inline bool rseq_grant_s
> #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */
>
> bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
> -bool rseq_debug_validate_ids(struct task_struct *t);
>
> static __always_inline void rseq_note_user_irq_entry(void)
> {
> @@ -368,43 +367,6 @@ bool rseq_debug_update_user_cs(struct ta
> return false;
> }
>
> -/*
> - * On debug kernels validate that user space did not mess with it if the
> - * debug branch is enabled.
> - */
> -bool rseq_debug_validate_ids(struct task_struct *t)
> -{
> - struct rseq __user *rseq = t->rseq.usrptr;
> - u32 cpu_id, uval, node_id;
> -
> - /*
> - * On the first exit after registering the rseq region CPU ID is
> - * RSEQ_CPU_ID_UNINITIALIZED and node_id in user space is 0!
> - */
> - node_id = t->rseq.ids.cpu_id != RSEQ_CPU_ID_UNINITIALIZED ?
> - cpu_to_node(t->rseq.ids.cpu_id) : 0;
> -
> - scoped_user_read_access(rseq, efault) {
> - unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault);
> - if (cpu_id != t->rseq.ids.cpu_id)
> - goto die;
> - unsafe_get_user(uval, &rseq->cpu_id, efault);
> - if (uval != cpu_id)
> - goto die;
> - unsafe_get_user(uval, &rseq->node_id, efault);
> - if (uval != node_id)
> - goto die;
> - unsafe_get_user(uval, &rseq->mm_cid, efault);
> - if (uval != t->rseq.ids.mm_cid)
> - goto die;
> - }
> - return true;
> -die:
> - t->rseq.event.fatal = true;
> -efault:
> - return false;
> -}
> -
> #endif /* RSEQ_BUILD_SLOW_PATH */
>
> /*
> @@ -519,12 +481,32 @@ bool rseq_set_ids_get_csaddr(struct task
> {
> struct rseq __user *rseq = t->rseq.usrptr;
>
> - if (static_branch_unlikely(&rseq_debug_enabled)) {
> - if (!rseq_debug_validate_ids(t))
> - return false;
> - }
> -
> scoped_user_rw_access(rseq, efault) {
> + /* Validate the R/O fields for debug and optimized mode */
> + if (static_branch_unlikely(&rseq_debug_enabled) || rseq_v2(t)) {
> + u32 cpu_id, uval, node_id;
> +
> + /*
> + * On the first exit after registering the rseq region CPU ID is
> + * RSEQ_CPU_ID_UNINITIALIZED and node_id in user space is 0!
> + */
> + node_id = t->rseq.ids.cpu_id != RSEQ_CPU_ID_UNINITIALIZED ?
> + cpu_to_node(t->rseq.ids.cpu_id) : 0;
> +
> + unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault);
> + if (cpu_id != t->rseq.ids.cpu_id)
> + goto die;
> + unsafe_get_user(uval, &rseq->cpu_id, efault);
> + if (uval != cpu_id)
> + goto die;
> + unsafe_get_user(uval, &rseq->node_id, efault);
> + if (uval != node_id)
> + goto die;
> + unsafe_get_user(uval, &rseq->mm_cid, efault);
> + if (uval != t->rseq.ids.mm_cid)
> + goto die;
> + }
> +
> unsafe_put_user(ids->cpu_id, &rseq->cpu_id_start, efault);
> unsafe_put_user(ids->cpu_id, &rseq->cpu_id, efault);
> unsafe_put_user(node_id, &rseq->node_id, efault);
> @@ -543,6 +525,9 @@ bool rseq_set_ids_get_csaddr(struct task
> rseq_stat_inc(rseq_stats.ids);
> rseq_trace_update(t, ids);
> return true;
> +
> +die:
> + t->rseq.event.fatal = true;
> efault:
> return false;
> }
>