Re: [PATCH v2 4/5] bpf: allow new DECAP flags and add guard rails

From: Martin KaFai Lau

Date: Tue Mar 24 2026 - 14:36:00 EST




On 3/18/26 6:42 AM, Nick Hudson wrote:
Add checks to require shrink-only decap, reject conflicting decap flag
combinations, and verify removed length is sufficient for claimed header
decapsulation.

Co-developed-by: Max Tottenham <mtottenh@xxxxxxxxxx>
Signed-off-by: Max Tottenham <mtottenh@xxxxxxxxxx>
Co-developed-by: Anna Glasgall <aglasgal@xxxxxxxxxx>
Signed-off-by: Anna Glasgall <aglasgal@xxxxxxxxxx>
Signed-off-by: Nick Hudson <nhudson@xxxxxxxxxx>
---
net/core/filter.c | 47 +++++++++++++++++++++++++++++++++++++----------
1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 7c2871b40fe4..47aec44a9cd3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -56,6 +56,7 @@
#include <net/sock_reuseport.h>
#include <net/busy_poll.h>
#include <net/tcp.h>
+#include <net/gre.h>
#include <net/xfrm.h>
#include <net/udp.h>
#include <linux/bpf_trace.h>
@@ -3496,7 +3497,9 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
BPF_F_ADJ_ROOM_ENCAP_L2( \
BPF_ADJ_ROOM_ENCAP_L2_MASK))
-#define BPF_F_ADJ_ROOM_DECAP_MASK (BPF_F_ADJ_ROOM_DECAP_L3_MASK)
+#define BPF_F_ADJ_ROOM_DECAP_MASK (BPF_F_ADJ_ROOM_DECAP_L3_MASK | \
+ BPF_F_ADJ_ROOM_DECAP_L4_MASK | \
+ BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)
#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
BPF_F_ADJ_ROOM_ENCAP_MASK | \
@@ -3743,20 +3746,44 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
return -ENOTSUPP;
}
- if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
+ if (flags & BPF_F_ADJ_ROOM_DECAP_MASK) {

This change should be done together with the macro refactoring patch mentioned in patch 3.

+ u32 len_decap_min = 0;
+
if (!shrink)
return -EINVAL;
- switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
- case BPF_F_ADJ_ROOM_DECAP_L3_IPV4:
+ /* Reject mutually exclusive decap flag pairs. */
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) ==
+ BPF_F_ADJ_ROOM_DECAP_L3_MASK)

iiuc, this 'if' and the len_min assignment changes below replace the existing switch case. Please separate this no-op change from the new flag validation logic. It is small enough to be done together in the macro refactoring patch also.

+ return -EINVAL;
+
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_MASK) ==
+ BPF_F_ADJ_ROOM_DECAP_L4_MASK)
+ return -EINVAL;
+
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK) ==
+ BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)
+ return -EINVAL;
+
+ /* Reject mutually exclusive decap tunnel type flags. */
+ if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_MASK) &&
+ (flags & BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK))
+ return -EINVAL;
+
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP)
+ len_decap_min += sizeof(struct udphdr);
+
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE)
+ len_decap_min += sizeof(struct gre_base_hdr);
+
+ if (len_diff_abs < len_decap_min)
+ return -EINVAL;
+
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
len_min = sizeof(struct iphdr);
- break;
- case BPF_F_ADJ_ROOM_DECAP_L3_IPV6:
+
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
len_min = sizeof(struct ipv6hdr);
- break;
- default:
- return -EINVAL;
- }
}
len_cur = skb->len - skb_network_offset(skb);