[PATCH 1/2] nfsd: clear cl_cb_session on DESTROY_SESSION

From: Chris Mason

Date: Tue May 19 2026 - 14:04:33 EST


The parent commit drains in-flight backchannel callbacks before
nfsd4_destroy_session() frees the session, which closes the primary
use-after-free. clp->cl_cb_session still caches the freed pointer
though: no destroy, expire, or shutdown path clears it, so the
stale value survives until the next CREATE_SESSION overwrites it.

A backchannel dispatch that races in during that window dereferences
the freed session through one of three helpers:

nfsd41_cb_get_slot()
ses = clp->cl_cb_session;
grab_slot(ses); /* deref */

nfsd41_cb_release_slot()
ses = clp->cl_cb_session;
spin_lock(&ses->se_lock); /* deref */

nfsd4_cb_sequence_done()
session = cb->cb_clp->cl_cb_session;
session->se_cb_seq_nr[...]; /* deref */

Fix by clearing the cached pointer in nfsd4_destroy_session() after
the inflight drain returns, under clp->cl_lock and only when the
session being destroyed is still the active backchannel session.
Pair the store with READ_ONCE() loads and NULL checks in the three
helpers so each takes its early-return / requeue path.

The encode and decode helpers also read cl_cb_session but only from
in-flight RPCs, which nfsd4_probe_callback_sync() has already
quiesced; the nfsd41_cb_get_slot() NULL check then blocks any new
RPC from acquiring a slot, so no fresh CB can reach encode/decode.
Without the WRITE_ONCE() store the NULL guards would be unreachable.

Assisted-by: kres (claude-opus-4-7)
Signed-off-by: Chris Mason <clm@xxxxxxxx>
---
fs/nfsd/nfs4callback.c | 24 ++++++++++++++++--------
fs/nfsd/nfs4state.c | 14 ++++++++++++++
fs/nfsd/trace.h | 35 ++++++++++++++++++++---------------
3 files changed, 50 insertions(+), 23 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 50827405468d..8af2d0cc37c2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1150,9 +1150,11 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
} else {
if (!conn->cb_xprt || !ses)
return -EINVAL;
- clp->cl_cb_session = ses;
+ spin_lock(&clp->cl_lock);
+ WRITE_ONCE(clp->cl_cb_session, ses);
+ spin_unlock(&clp->cl_lock);
args.bc_xprt = conn->cb_xprt;
- args.prognumber = clp->cl_cb_session->se_cb_prog;
+ args.prognumber = ses->se_cb_prog;
args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
XPRT_TRANSPORT_BC;
args.authflavor = ses->se_cb_sec.flavor;
@@ -1278,10 +1280,12 @@ static int grab_slot(struct nfsd4_session *ses)
static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
{
struct nfs4_client *clp = cb->cb_clp;
- struct nfsd4_session *ses = clp->cl_cb_session;
+ struct nfsd4_session *ses = READ_ONCE(clp->cl_cb_session);

if (cb->cb_held_slot >= 0)
return true;
+ if (!ses)
+ return false;
cb->cb_held_slot = grab_slot(ses);
if (cb->cb_held_slot < 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
@@ -1297,12 +1301,14 @@ static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
- struct nfsd4_session *ses = clp->cl_cb_session;
+ struct nfsd4_session *ses = READ_ONCE(clp->cl_cb_session);

if (cb->cb_held_slot >= 0) {
- spin_lock(&ses->se_lock);
- ses->se_cb_slot_avail |= BIT(cb->cb_held_slot);
- spin_unlock(&ses->se_lock);
+ if (ses) {
+ spin_lock(&ses->se_lock);
+ ses->se_cb_slot_avail |= BIT(cb->cb_held_slot);
+ spin_unlock(&ses->se_lock);
+ }
cb->cb_held_slot = -1;
rpc_wake_up_next(&clp->cl_cb_waitq);
}
@@ -1442,9 +1448,11 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
/* Returns true if CB_COMPOUND processing should continue */
static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb)
{
- struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
+ struct nfsd4_session *session = READ_ONCE(cb->cb_clp->cl_cb_session);
bool ret = false;

+ if (!session)
+ goto requeue;
if (cb->cb_held_slot < 0)
goto requeue;

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6837b63d9864..563af61796e3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4281,6 +4281,20 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_compound_state *cstate,

nfsd4_probe_callback_sync(ses->se_client);

+ /*
+ * The inflight drain inside nfsd4_probe_callback_sync() guarantees
+ * that no backchannel RPC still references @ses. Clear the client's
+ * cached backchannel session pointer so any callback that races in
+ * after this point (e.g. via a freshly re-established backchannel)
+ * cannot dereference the about-to-be-freed session. The matching
+ * READ_ONCE() NULL checks live in nfsd41_cb_get_slot(),
+ * nfsd41_cb_release_slot() and nfsd4_cb_sequence_done().
+ */
+ spin_lock(&ses->se_client->cl_lock);
+ if (ses->se_client->cl_cb_session == ses)
+ WRITE_ONCE(ses->se_client->cl_cb_session, NULL);
+ spin_unlock(&ses->se_client->cl_lock);
+
spin_lock(&nn->client_lock);
status = nfs_ok;
out_put_session:
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 5ad38f50836d..db9b10978b2d 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -1742,17 +1742,19 @@ TRACE_EVENT(nfsd_cb_seq_status,
),
TP_fast_assign(
const struct nfs4_client *clp = cb->cb_clp;
- const struct nfsd4_session *session = clp->cl_cb_session;
- const struct nfsd4_sessionid *sid =
- (struct nfsd4_sessionid *)&session->se_sessionid;
+ const struct nfsd4_session *session =
+ READ_ONCE(clp->cl_cb_session);
+ const struct nfsd4_sessionid *sid = session ?
+ (struct nfsd4_sessionid *)&session->se_sessionid :
+ NULL;

__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client ?
task->tk_client->cl_clid : -1;
- __entry->cl_boot = sid->clientid.cl_boot;
- __entry->cl_id = sid->clientid.cl_id;
- __entry->seqno = sid->sequence;
- __entry->reserved = sid->reserved;
+ __entry->cl_boot = sid ? sid->clientid.cl_boot : 0;
+ __entry->cl_id = sid ? sid->clientid.cl_id : 0;
+ __entry->seqno = sid ? sid->sequence : 0;
+ __entry->reserved = sid ? sid->reserved : 0;
__entry->tk_status = task->tk_status;
__entry->seq_status = cb->cb_seq_status;
),
@@ -1782,18 +1784,21 @@ TRACE_EVENT(nfsd_cb_free_slot,
),
TP_fast_assign(
const struct nfs4_client *clp = cb->cb_clp;
- const struct nfsd4_session *session = clp->cl_cb_session;
- const struct nfsd4_sessionid *sid =
- (struct nfsd4_sessionid *)&session->se_sessionid;
+ const struct nfsd4_session *session =
+ READ_ONCE(clp->cl_cb_session);
+ const struct nfsd4_sessionid *sid = session ?
+ (struct nfsd4_sessionid *)&session->se_sessionid :
+ NULL;

__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client ?
task->tk_client->cl_clid : -1;
- __entry->cl_boot = sid->clientid.cl_boot;
- __entry->cl_id = sid->clientid.cl_id;
- __entry->seqno = sid->sequence;
- __entry->reserved = sid->reserved;
- __entry->slot_seqno = session->se_cb_seq_nr[cb->cb_held_slot];
+ __entry->cl_boot = sid ? sid->clientid.cl_boot : 0;
+ __entry->cl_id = sid ? sid->clientid.cl_id : 0;
+ __entry->seqno = sid ? sid->sequence : 0;
+ __entry->reserved = sid ? sid->reserved : 0;
+ __entry->slot_seqno = session ?
+ session->se_cb_seq_nr[cb->cb_held_slot] : 0;
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
" sessionid=%08x:%08x:%08x:%08x new slot seqno=%u",
--
2.52.0