Re: lockup and kernel panic in linux-next-202505{09,12} when compiled with clang

From: Bert Karwatzki
Date: Sat May 17 2025 - 07:40:10 EST


Am Freitag, dem 16.05.2025 um 20:19 +0200 schrieb Bert Karwatzki:
> I've added a debugging statement:
>
> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
> index 3bd5ee0995fe..853493eca4f5 100644
> --- a/net/mac80211/tx.c
> +++ b/net/mac80211/tx.c
> @@ -4586,7 +4586,11 @@ static noinline void ieee80211_8023_xmit_clang_debug_helper(struct sk_buff *skb,
> struct ieee80211_local *local,
> struct ieee80211_tx_info *info)
> {
> - if (unlikely(skb->sk && sock_flag(skb->sk, SOCK_WIFI_STATUS))) {
> + if (unlikely(skb->sk && ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ||
> + sock_flag(skb->sk, SOCK_WIFI_STATUS)))) {
> + if ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ^ sock_flag(skb->sk, SOCK_WIFI_STATUS))
> + printk(KERN_INFO "%s: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = %u sock_flag(skb->sk,
> SOCK_WIFI_STATUS) = %u\n",
> + __func__, (skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS), sock_flag(skb->sk,
> SOCK_WIFI_STATUS));
> info->status_data = ieee80211_store_ack_skb(local, skb,
> &info->flags, NULL);
> if (info->status_data)
>
> This gives the following logoutput (and a lockup), indicating that sock_flag(skb->sk, SOCK_WIFI_STATUS) and
> (skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) are actually NOT equivalent (when compiled with clang and
> PREEMPT_RT=y)

I've added more debugging output:

diff --git a/include/net/sock.h b/include/net/sock.h
index e223102337c7..e13560b5b7a8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2735,8 +2735,10 @@ static inline void _sock_tx_timestamp(struct sock *sk,
*tskey = atomic_inc_return(&sk->sk_tskey) - 1;
}
}
- if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
+ if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) {
+ printk(KERN_INFO "%s: setting SKBTX_WIFI_STATUS for sk = %px\n", __func__, sk);
*tx_flags |= SKBTX_WIFI_STATUS;
+ }
}

static inline void sock_tx_timestamp(struct sock *sk,
diff --git a/net/core/sock.c b/net/core/sock.c
index e02a78538e3e..f6589ad5ba36 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1548,6 +1548,7 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
break;

case SO_WIFI_STATUS:
+ printk(KERN_INFO "%s: setting SOCK_WIFI_STATUS to %u for sk = %px\n", __func__, valbool, sk);
sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
break;

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 853493eca4f5..eee2f80949c6 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4588,9 +4588,12 @@ static noinline void ieee80211_8023_xmit_clang_debug_helper(struct sk_buff *skb,
{
if (unlikely(skb->sk && ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ||
sock_flag(skb->sk, SOCK_WIFI_STATUS)))) {
- if ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ^ sock_flag(skb->sk, SOCK_WIFI_STATUS))
+ if ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ^ sock_flag(skb->sk, SOCK_WIFI_STATUS)) {
printk(KERN_INFO "%s: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = %u sock_flag(skb->sk, SOCK_WIFI_STATUS) = %u\n",
__func__, (skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS), sock_flag(skb->sk, SOCK_WIFI_STATUS));
+ printk(KERN_INFO "%s: skb->sk = %px skb->sk->sk_flags = 0x%lx\n", __func__, skb->sk, skb->sk->sk_flags);
+ return; // This should make this case non-fatal.
+ }
info->status_data = ieee80211_store_ack_skb(local, skb,
&info->flags, NULL);
if (info->status_data)



This gives after ~15min uptime

[ 189.337797] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
[ 189.337803] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1b798c4e00 skb->sk->sk_flags = 0xffffffffb4efe640
[ 191.325256] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
[ 191.325259] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1b798c5a00 skb->sk->sk_flags = 0xffffffffb4efe640
[ 257.591831] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
[ 257.591844] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1baf3bca00 skb->sk->sk_flags = 0xffffffffb4efe640
[ 301.786963] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
[ 301.786967] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1c1bc40100 skb->sk->sk_flags = 0xffffffffb4efe640
[ 302.780881] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
[ 302.780884] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1a44cf6000 skb->sk->sk_flags = 0xffffffffb4efe640
[ 482.792298] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
[ 482.792304] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1da0f4de00 skb->sk->sk_flags = 0xffffffffb4efe640
[ 482.806144] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
[ 482.806148] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1da0f4c500 skb->sk->sk_flags = 0xffffffffb4efe640
[ 482.817280] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
[ 482.817284] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1da0f4df00 skb->sk->sk_flags = 0xffffffffb4efe640
[ 552.327291] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
[ 552.327295] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1da0f4de00 skb->sk->sk_flags = 0xffffffffb4efe640
[ 916.971599] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
[ 916.971607] [ T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1a62834000 skb->sk->sk_flags = 0xffffffffb4efe640

The printk()s in sk_set_sockopt() and _sock_tx_timestamp() are not called at all so the flag 
SOCK_WIFI_STATUS is actually nevers set! What is printed when printing skb->sk->sk_flags looks
suspiciously like a pointer, and as sk_flags is actually a member of a union in struct sock_common
it seems clang is using sk_flags for one of the other union members here

struct sock_common {
[...]
union {
unsigned long skc_flags;
struct sock *skc_listener; /* request_sock */
struct inet_timewait_death_row *skc_tw_dr; /* inet_timewait_sock */
};
[...]
}

Bert Karwatzki