Re: [PATCH v8 02/10] x86/bhi: Make clear_bhb_loop() effective on newer CPUs

From: Borislav Petkov

Date: Tue Mar 24 2026 - 17:00:42 EST


On Tue, Mar 24, 2026 at 11:16:51AM -0700, Pawan Gupta wrote:
> As a mitigation for BHI, clear_bhb_loop() executes branches that overwrites
> the Branch History Buffer (BHB). On Alder Lake and newer parts this
> sequence is not sufficient because it doesn't clear enough entries. This
> was not an issue because these CPUs have a hardware control (BHI_DIS_S)
> that mitigates BHI in kernel.
>
> BHI variant of VMSCAPE requires isolating branch history between guests and
> userspace. Note that there is no equivalent hardware control for userspace.
> To effectively isolate branch history on newer CPUs, clear_bhb_loop()
> should execute sufficient number of branches to clear a larger BHB.
>
> Dynamically set the loop count of clear_bhb_loop() such that it is
> effective on newer CPUs too. Use the hardware control enumeration
> X86_FEATURE_BHI_CTRL to select the appropriate loop count.
>
> Suggested-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
> Reviewed-by: Nikolay Borisov <nik.borisov@xxxxxxxx>
> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@xxxxxxxxxxxxxxx>
> ---
> arch/x86/entry/entry_64.S | 21 ++++++++++++++++-----
> arch/x86/net/bpf_jit_comp.c | 7 -------
> 2 files changed, 16 insertions(+), 12 deletions(-)

Ok, pls tell me why this below doesn't work?

The additional indirection makes even the BHB loop code simpler.

(I didn't pay too much attention to the labels, 2: is probably weird there).

---

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3a180a36ca0e..95c7ed9afbbe 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1532,11 +1532,13 @@ SYM_CODE_END(rewind_stack_and_make_dead)
* Note, callers should use a speculation barrier like LFENCE immediately after
* a call to this function to ensure BHB is cleared before indirect branches.
*/
-SYM_FUNC_START(clear_bhb_loop)
+SYM_FUNC_START(__clear_bhb_loop)
ANNOTATE_NOENDBR
push %rbp
+ /* BPF caller may require %rax to be preserved */
+ push %rax
mov %rsp, %rbp
- movl $5, %ecx
+
ANNOTATE_INTRA_FUNCTION_CALL
call 1f
jmp 5f
@@ -1557,17 +1559,17 @@ SYM_FUNC_START(clear_bhb_loop)
* but some Clang versions (e.g. 18) don't like this.
*/
.skip 32 - 18, 0xcc
-2: movl $5, %eax
+2:
3: jmp 4f
nop
-4: sub $1, %eax
+4: sub $1, %rsi
jnz 3b
- sub $1, %ecx
+ sub $1, %rdi
jnz 1b
.Lret2: RET
5:
+ pop %rax
pop %rbp
RET
-SYM_FUNC_END(clear_bhb_loop)
-EXPORT_SYMBOL_FOR_KVM(clear_bhb_loop)
-STACK_FRAME_NON_STANDARD(clear_bhb_loop)
+SYM_FUNC_END(__clear_bhb_loop)
+STACK_FRAME_NON_STANDARD(__clear_bhb_loop)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 70b377fcbc1c..a9f406941e11 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -390,6 +390,7 @@ extern void write_ibpb(void);

#ifdef CONFIG_X86_64
extern void clear_bhb_loop(void);
+extern void __clear_bhb_loop(unsigned int a, unsigned int b);
#endif

extern void (*x86_return_thunk)(void);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 83f51cab0b1e..c41b0548cf2a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -3735,3 +3735,11 @@ void __warn_thunk(void)
{
WARN_ONCE(1, "Unpatched return thunk in use. This should not happen!\n");
}
+
+void clear_bhb_loop(void)
+{
+ if (cpu_feature_enabled(X86_FEATURE_BHI_CTRL))
+ __clear_bhb_loop(12, 7);
+ else
+ __clear_bhb_loop(5, 5);
+}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 63d6c9fa5e80..e2cceabb23e8 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1614,11 +1614,6 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip,
u8 *func;

if (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP)) {
- /* The clearing sequence clobbers eax and ecx. */
- EMIT1(0x50); /* push rax */
- EMIT1(0x51); /* push rcx */
- ip += 2;
-
func = (u8 *)clear_bhb_loop;
ip += x86_call_depth_emit_accounting(&prog, func, ip);

@@ -1626,8 +1621,6 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip,
return -EINVAL;
/* Don't speculate past this until BHB is cleared */
EMIT_LFENCE();
- EMIT1(0x59); /* pop rcx */
- EMIT1(0x58); /* pop rax */
}
/* Insert IBHF instruction */
if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) &&

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette