[PATCH v7 02/10] x86/bhi: Make clear_bhb_loop() effective on newer CPUs
From: Pawan Gupta
Date: Thu Mar 19 2026 - 11:42:32 EST
As a mitigation for BHI, clear_bhb_loop() executes branches that overwrites
the Branch History Buffer (BHB). On Alder Lake and newer parts this
sequence is not sufficient because it doesn't clear enough entries. This
was not an issue because these CPUs have a hardware control (BHI_DIS_S)
that mitigates BHI in kernel.
BHI variant of VMSCAPE requires isolating branch history between guests and
userspace. Note that there is no equivalent hardware control for userspace.
To effectively isolate branch history on newer CPUs, clear_bhb_loop()
should execute sufficient number of branches to clear a larger BHB.
Dynamically set the loop count of clear_bhb_loop() such that it is
effective on newer CPUs too. Use the hardware control enumeration
X86_FEATURE_BHI_CTRL to select the appropriate loop count.
Suggested-by: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Reviewed-by: Nikolay Borisov <nik.borisov@xxxxxxxx>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@xxxxxxxxxxxxxxx>
---
arch/x86/entry/entry_64.S | 21 ++++++++++++++++-----
arch/x86/net/bpf_jit_comp.c | 7 -------
2 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3a180a36ca0e..8128e00ca73f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1535,8 +1535,17 @@ SYM_CODE_END(rewind_stack_and_make_dead)
SYM_FUNC_START(clear_bhb_loop)
ANNOTATE_NOENDBR
push %rbp
+ /* BPF caller may require %rax to be preserved */
+ push %rax
mov %rsp, %rbp
- movl $5, %ecx
+
+ /*
+ * Between the long and short version of BHB clear sequence, just the
+ * loop count differs based on BHI_CTRL, see Intel's BHI guidance.
+ */
+ ALTERNATIVE "movb $5, %al", \
+ "movb $12, %al", X86_FEATURE_BHI_CTRL
+
ANNOTATE_INTRA_FUNCTION_CALL
call 1f
jmp 5f
@@ -1556,16 +1565,18 @@ SYM_FUNC_START(clear_bhb_loop)
* This should be ideally be: .skip 32 - (.Lret2 - 2f), 0xcc
* but some Clang versions (e.g. 18) don't like this.
*/
- .skip 32 - 18, 0xcc
-2: movl $5, %eax
+ .skip 32 - 14, 0xcc
+2: ALTERNATIVE "movb $5, %ah", \
+ "movb $7, %ah", X86_FEATURE_BHI_CTRL
3: jmp 4f
nop
-4: sub $1, %eax
+4: sub $1, %ah
jnz 3b
- sub $1, %ecx
+ sub $1, %al
jnz 1b
.Lret2: RET
5:
+ pop %rax
pop %rbp
RET
SYM_FUNC_END(clear_bhb_loop)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 63d6c9fa5e80..e2cceabb23e8 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1614,11 +1614,6 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip,
u8 *func;
if (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP)) {
- /* The clearing sequence clobbers eax and ecx. */
- EMIT1(0x50); /* push rax */
- EMIT1(0x51); /* push rcx */
- ip += 2;
-
func = (u8 *)clear_bhb_loop;
ip += x86_call_depth_emit_accounting(&prog, func, ip);
@@ -1626,8 +1621,6 @@ static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip,
return -EINVAL;
/* Don't speculate past this until BHB is cleared */
EMIT_LFENCE();
- EMIT1(0x59); /* pop rcx */
- EMIT1(0x58); /* pop rax */
}
/* Insert IBHF instruction */
if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) &&
--
2.34.1