[PATCH bpf-next 1/2] bpf, x86: patch tail-call fentry slot on non-IBT JITs

From: Takeru Hayasaka

Date: Fri Mar 27 2026 - 10:21:10 EST

x86 tail-call fentry patching mirrors CALL text pokes to the tail-call
landing slot.

The helper that locates that mirrored slot assumes an ENDBR-prefixed
landing, which works on IBT JITs but fails on non-IBT JITs where the
landing starts directly with the 5-byte patch slot.

As a result, the regular entry gets patched but the tail-call landing
remains NOP5, so fentry never fires for tail-called programs on non-IBT
kernels.

Anchor the lookup on the landing address, verify the short-jump layout
first, and only check ENDBR when one is actually emitted.

Signed-off-by: Takeru Hayasaka <hayatake396@xxxxxxxxx>
---
arch/x86/net/bpf_jit_comp.c | 47 ++++++++++++++++++++++++++++++++++---
1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e9b78040d703..fe5fd37f65d8 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -325,8 +325,10 @@ struct jit_context {

/* Number of bytes emit_patch() needs to generate instructions */
#define X86_PATCH_SIZE 5
+/* Number of bytes used by the short jump that skips the tail-call hook. */
+#define X86_TAIL_CALL_SKIP_JMP_SIZE 2
/* Number of bytes that will be skipped on tailcall */
-#define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE)
+#define X86_TAIL_CALL_OFFSET (12 + X86_TAIL_CALL_SKIP_JMP_SIZE + ENDBR_INSN_SIZE)

static void push_r9(u8 **pprog)
{
@@ -545,8 +547,15 @@ static void emit_prologue(u8 **pprog, u8 *ip, u32 stack_depth, bool ebpf_from_cb
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
}

+ if (!is_subprog) {
+ /* Normal entry skips the tail-call-only trampoline hook. */
+ EMIT2(0xEB, ENDBR_INSN_SIZE + X86_PATCH_SIZE);
+ }
+
/* X86_TAIL_CALL_OFFSET is here */
EMIT_ENDBR();
+ if (!is_subprog)
+ emit_nops(&prog, X86_PATCH_SIZE);

/* sub rsp, rounded_stack_depth */
if (stack_depth)
@@ -632,12 +641,33 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
return ret;
}

+static void *bpf_tail_call_fentry_ip(void *ip)
+{
+ u8 *tail_ip = ip + X86_TAIL_CALL_OFFSET;
+ u8 *landing = tail_ip - ENDBR_INSN_SIZE;
+
+ /* ip points at the regular fentry slot after the entry ENDBR. */
+ if (landing[-X86_TAIL_CALL_SKIP_JMP_SIZE] != 0xEB ||
+ landing[-X86_TAIL_CALL_SKIP_JMP_SIZE + 1] !=
+ ENDBR_INSN_SIZE + X86_PATCH_SIZE)
+ return NULL;
+
+ if (ENDBR_INSN_SIZE && !is_endbr((u32 *)landing))
+ return NULL;
+
+ return tail_ip;
+}
+
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
enum bpf_text_poke_type new_t, void *old_addr,
void *new_addr)
{
+ void *tail_ip = NULL;
+ bool is_bpf_text = is_bpf_text_address((long)ip);
+ int ret, tail_ret;
+
if (!is_kernel_text((long)ip) &&
- !is_bpf_text_address((long)ip))
+ !is_bpf_text)
/* BPF poking in modules is not supported */
return -EINVAL;

@@ -648,7 +678,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
if (is_endbr(ip))
ip += ENDBR_INSN_SIZE;

- return __bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr);
+ if (is_bpf_text && (old_t == BPF_MOD_CALL || new_t == BPF_MOD_CALL))
+ tail_ip = bpf_tail_call_fentry_ip(ip);
+
+ ret = __bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr);
+ if (ret < 0 || !tail_ip)
+ return ret;
+
+ tail_ret = __bpf_arch_text_poke(tail_ip, old_t, new_t, old_addr, new_addr);
+ if (tail_ret < 0)
+ return tail_ret;
+
+ return ret && tail_ret;
}

#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
--
2.43.0