[RFC PATCH] riscv: add userspace interface to voluntarily release vector state

From: daichengrong

Date: Mon Mar 16 2026 - 22:23:55 EST


Vector registers in RVV can be large, and saving/restoring them on
context switches introduces overhead. Some workloads only use
vector instructions in short phases, after which the vector state
does not need to be preserved.

This patch introduces a userspace-controlled mechanism:

- Userspace can declare that it no longer needs the vector state.
- Kernel will skip saving/restoring vector registers during context
switch while the declaration is active.
- If the thread executes vector instructions after releasing its
vector state, the kernel will revoke the declaration automatically.

This reduces unnecessary vector context switch overhead and improves
performance in workloads with intermittent vector usage.

This is an RFC patch to solicit feedback on the API design and
implementation approach.

Signed-off-by: daichengrong <daichengrong@xxxxxxxxxxx>
---
arch/riscv/include/asm/processor.h | 1 +
arch/riscv/include/asm/syscall.h | 2 ++
arch/riscv/include/asm/vector.h | 7 +++++--
arch/riscv/kernel/process.c | 1 +
arch/riscv/kernel/sys_riscv.c | 12 ++++++++++++
scripts/syscall.tbl | 1 +
6 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 4c3dd94d0f63..b59f1456918b 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -113,6 +113,7 @@ struct thread_struct {
unsigned long envcfg;
unsigned long sum;
u32 riscv_v_flags;
+ unsigned long riscv_v_release_flags;
u32 vstate_ctrl;
struct __riscv_v_ext_state vstate;
unsigned long align_ctl;
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 8067e666a4ca..f6be37b01a67 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -121,4 +121,6 @@ asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);

asmlinkage long sys_riscv_hwprobe(struct riscv_hwprobe *, size_t, size_t,
unsigned long *, unsigned int);
+// asmlinkage long sys_riscv_release_vector_register(uintptr_t);
+asmlinkage long sys_riscv_release_vector_register(void);
#endif /* _ASM_RISCV_SYSCALL_H */
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 00cb9c0982b1..4bccccc20cc3 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -309,6 +309,7 @@ static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
if (__riscv_v_vstate_check(regs->status, DIRTY)) {
__riscv_v_vstate_save(vstate, vstate->datap);
__riscv_v_vstate_clean(regs);
+ WRITE_ONCE(current->thread.riscv_v_release_flags, 0);
}
}

@@ -325,8 +326,10 @@ static inline void riscv_v_vstate_set_restore(struct task_struct *task,
struct pt_regs *regs)
{
if (riscv_v_vstate_query(regs)) {
- set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
- riscv_v_vstate_on(regs);
+ if (!READ_ONCE(current->thread.riscv_v_release_flags)) {
+ set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
+ riscv_v_vstate_on(regs);
+ }
}
}

diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index aacb23978f93..f1f36a3c7914 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -279,6 +279,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
p->thread.ra = (unsigned long)ret_from_fork_user_asm;
}
p->thread.riscv_v_flags = 0;
+ p->thread.riscv_v_release_flags = 0;
if (has_vector() || has_xtheadvector())
riscv_v_thread_alloc(p);
p->thread.sp = (unsigned long)childregs; /* kernel sp */
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 22fc9b3268be..934ddc06858d 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -8,6 +8,7 @@
#include <linux/syscalls.h>
#include <asm/cacheflush.h>
#include <asm-generic/mman-common.h>
+#include <asm/vector.h>

static long riscv_sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
@@ -78,6 +79,17 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
return 0;
}

+SYSCALL_DEFINE0(riscv_release_vector_register)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+
+ if (__riscv_v_vstate_check(regs->status, DIRTY))
+ __riscv_v_vstate_clean(regs);
+
+ WRITE_ONCE(current->thread.riscv_v_release_flags, 1);
+ return 0;
+}
+
/* Not defined using SYSCALL_DEFINE0 to avoid error injection */
asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *__unused)
{
diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl
index 7a42b32b6577..1d0a493b87c3 100644
--- a/scripts/syscall.tbl
+++ b/scripts/syscall.tbl
@@ -302,6 +302,7 @@

244 or1k or1k_atomic sys_or1k_atomic

+257 riscv riscv_release_vector_register sys_riscv_release_vector_register
258 riscv riscv_hwprobe sys_riscv_hwprobe
259 riscv riscv_flush_icache sys_riscv_flush_icache

--
2.25.1