Re: [RFC v5 6/7] ext4: fast commit: add lock_updates tracepoint
From: Li Chen
Date: Wed Mar 25 2026 - 02:17:27 EST
Hi Steven,
Thank you for your review, and I apologize for my delayed response.
---- On Wed, 18 Mar 2026 00:21:29 +0800 Steven Rostedt <rostedt@xxxxxxxxxxx> wrote ---
> On Tue, 17 Mar 2026 16:46:21 +0800
> Li Chen <me@linux.beauty> wrote:
>
> > Commit-time fast commit snapshots run under jbd2_journal_lock_updates(),
> > so it is useful to quantify the time spent with updates locked and to
> > understand why snapshotting can fail.
> >
> > Add a new tracepoint, ext4_fc_lock_updates, reporting the time spent in
> > the updates-locked window along with the number of snapshotted inodes
> > and ranges. Record the first snapshot failure reason in a stable snap_err
> > field for tooling.
> >
> > Signed-off-by: Li Chen <me@linux.beauty>
> > ---
> > fs/ext4/ext4.h | 15 ++++++++
> > fs/ext4/fast_commit.c | 71 +++++++++++++++++++++++++++++--------
> > include/trace/events/ext4.h | 61 +++++++++++++++++++++++++++++++
> > 3 files changed, 132 insertions(+), 15 deletions(-)
> >
> > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> > index 68a64fa0be926..b9e146f3dd9e4 100644
> > --- a/fs/ext4/ext4.h
> > +++ b/fs/ext4/ext4.h
> > @@ -1037,6 +1037,21 @@ enum {
> >
> > struct ext4_fc_inode_snap;
> >
> > +/*
> > + * Snapshot failure reasons for ext4_fc_lock_updates tracepoint.
> > + * Keep these stable for tooling.
> > + */
> > +enum ext4_fc_snap_err {
> > + EXT4_FC_SNAP_ERR_NONE = 0,
> > + EXT4_FC_SNAP_ERR_ES_MISS = 1,
> > + EXT4_FC_SNAP_ERR_ES_DELAYED = 2,
> > + EXT4_FC_SNAP_ERR_ES_OTHER = 3,
> > + EXT4_FC_SNAP_ERR_INODES_CAP = 4,
> > + EXT4_FC_SNAP_ERR_RANGES_CAP = 5,
> > + EXT4_FC_SNAP_ERR_NOMEM = 6,
> > + EXT4_FC_SNAP_ERR_INODE_LOC = 7,
>
> You don't need to explicitly state the assignments, the enum will increment
> them without them.
Agree.
> > +};
> > +
> > /*
> > * fourth extended file system inode data in memory
> > */
> > diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> > index d1eefee609120..4929e2990b292 100644
> > --- a/fs/ext4/fast_commit.c
> > +++ b/fs/ext4/fast_commit.c
> > @@ -193,6 +193,12 @@ static struct kmem_cache *ext4_fc_range_cachep;
> > #define EXT4_FC_SNAPSHOT_MAX_INODES 1024
> > #define EXT4_FC_SNAPSHOT_MAX_RANGES 2048
> >
> > +static inline void ext4_fc_set_snap_err(int *snap_err, int err)
> > +{
> > + if (snap_err && *snap_err == EXT4_FC_SNAP_ERR_NONE)
> > + *snap_err = err;
> > +}
> > +
> > static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
> > {
> > BUFFER_TRACE(bh, "");
> > @@ -983,11 +989,12 @@ static void ext4_fc_free_inode_snap(struct inode *inode)
> > static int ext4_fc_snapshot_inode_data(struct inode *inode,
> > struct list_head *ranges,
> > unsigned int nr_ranges_total,
> > - unsigned int *nr_rangesp)
> > + unsigned int *nr_rangesp,
> > + int *snap_err)
> > {
> > struct ext4_inode_info *ei = EXT4_I(inode);
> > - unsigned int nr_ranges = 0;
> > ext4_lblk_t start_lblk, end_lblk, cur_lblk;
> > + unsigned int nr_ranges = 0;
> >
> > spin_lock(&ei->i_fc_lock);
> > if (ei->i_fc_lblk_len == 0) {
> > @@ -1010,11 +1017,16 @@ static int ext4_fc_snapshot_inode_data(struct inode *inode,
> > struct ext4_fc_range *range;
> > ext4_lblk_t len;
> >
> > - if (!ext4_es_lookup_extent(inode, cur_lblk, NULL, &es, NULL))
> > + if (!ext4_es_lookup_extent(inode, cur_lblk, NULL, &es, NULL)) {
> > + ext4_fc_set_snap_err(snap_err, EXT4_FC_SNAP_ERR_ES_MISS);
> > return -EAGAIN;
> > + }
> >
> > - if (ext4_es_is_delayed(&es))
> > + if (ext4_es_is_delayed(&es)) {
> > + ext4_fc_set_snap_err(snap_err,
> > + EXT4_FC_SNAP_ERR_ES_DELAYED);
> > return -EAGAIN;
> > + }
> >
> > len = es.es_len - (cur_lblk - es.es_lblk);
> > if (len > end_lblk - cur_lblk + 1)
> > @@ -1024,12 +1036,17 @@ static int ext4_fc_snapshot_inode_data(struct inode *inode,
> > continue;
> > }
> >
> > - if (nr_ranges_total + nr_ranges >= EXT4_FC_SNAPSHOT_MAX_RANGES)
> > + if (nr_ranges_total + nr_ranges >= EXT4_FC_SNAPSHOT_MAX_RANGES) {
> > + ext4_fc_set_snap_err(snap_err,
> > + EXT4_FC_SNAP_ERR_RANGES_CAP);
> > return -E2BIG;
> > + }
> >
> > range = kmem_cache_alloc(ext4_fc_range_cachep, GFP_NOFS);
> > - if (!range)
> > + if (!range) {
> > + ext4_fc_set_snap_err(snap_err, EXT4_FC_SNAP_ERR_NOMEM);
> > return -ENOMEM;
> > + }
> > nr_ranges++;
> >
> > range->lblk = cur_lblk;
> > @@ -1054,6 +1071,7 @@ static int ext4_fc_snapshot_inode_data(struct inode *inode,
> > range->len = max;
> > } else {
> > kmem_cache_free(ext4_fc_range_cachep, range);
> > + ext4_fc_set_snap_err(snap_err, EXT4_FC_SNAP_ERR_ES_OTHER);
> > return -EAGAIN;
> > }
> >
> > @@ -1070,7 +1088,7 @@ static int ext4_fc_snapshot_inode_data(struct inode *inode,
> >
> > static int ext4_fc_snapshot_inode(struct inode *inode,
> > unsigned int nr_ranges_total,
> > - unsigned int *nr_rangesp)
> > + unsigned int *nr_rangesp, int *snap_err)
> > {
> > struct ext4_inode_info *ei = EXT4_I(inode);
> > struct ext4_fc_inode_snap *snap;
> > @@ -1082,8 +1100,10 @@ static int ext4_fc_snapshot_inode(struct inode *inode,
> > int alloc_ctx;
> >
> > ret = ext4_get_inode_loc_noio(inode, &iloc);
> > - if (ret)
> > + if (ret) {
> > + ext4_fc_set_snap_err(snap_err, EXT4_FC_SNAP_ERR_INODE_LOC);
> > return ret;
> > + }
> >
> > if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
> > inode_len = EXT4_INODE_SIZE(inode->i_sb);
> > @@ -1092,6 +1112,7 @@ static int ext4_fc_snapshot_inode(struct inode *inode,
> >
> > snap = kmalloc(struct_size(snap, inode_buf, inode_len), GFP_NOFS);
> > if (!snap) {
> > + ext4_fc_set_snap_err(snap_err, EXT4_FC_SNAP_ERR_NOMEM);
> > brelse(iloc.bh);
> > return -ENOMEM;
> > }
> > @@ -1102,7 +1123,7 @@ static int ext4_fc_snapshot_inode(struct inode *inode,
> > brelse(iloc.bh);
> >
> > ret = ext4_fc_snapshot_inode_data(inode, &ranges, nr_ranges_total,
> > - &nr_ranges);
> > + &nr_ranges, snap_err);
> > if (ret) {
> > kfree(snap);
> > ext4_fc_free_ranges(&ranges);
> > @@ -1203,7 +1224,10 @@ static int ext4_fc_alloc_snapshot_inodes(struct super_block *sb,
> > unsigned int *nr_inodesp);
> >
> > static int ext4_fc_snapshot_inodes(journal_t *journal, struct inode **inodes,
> > - unsigned int inodes_size)
> > + unsigned int inodes_size,
> > + unsigned int *nr_inodesp,
> > + unsigned int *nr_rangesp,
> > + int *snap_err)
> > {
> > struct super_block *sb = journal->j_private;
> > struct ext4_sb_info *sbi = EXT4_SB(sb);
> > @@ -1221,6 +1245,8 @@ static int ext4_fc_snapshot_inodes(journal_t *journal, struct inode **inodes,
> > alloc_ctx = ext4_fc_lock(sb);
> > list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
> > if (i >= inodes_size) {
> > + ext4_fc_set_snap_err(snap_err,
> > + EXT4_FC_SNAP_ERR_INODES_CAP);
> > ret = -E2BIG;
> > goto unlock;
> > }
> > @@ -1244,6 +1270,8 @@ static int ext4_fc_snapshot_inodes(journal_t *journal, struct inode **inodes,
> > continue;
> >
> > if (i >= inodes_size) {
> > + ext4_fc_set_snap_err(snap_err,
> > + EXT4_FC_SNAP_ERR_INODES_CAP);
> > ret = -E2BIG;
> > goto unlock;
> > }
> > @@ -1268,16 +1296,20 @@ static int ext4_fc_snapshot_inodes(journal_t *journal, struct inode **inodes,
> > unsigned int inode_ranges = 0;
> >
> > ret = ext4_fc_snapshot_inode(inodes[idx], nr_ranges,
> > - &inode_ranges);
> > + &inode_ranges, snap_err);
> > if (ret)
> > break;
> > nr_ranges += inode_ranges;
> > }
> >
> > + if (nr_inodesp)
> > + *nr_inodesp = i;
> > + if (nr_rangesp)
> > + *nr_rangesp = nr_ranges;
> > return ret;
> > }
> >
> > -static int ext4_fc_perform_commit(journal_t *journal)
> > +static int ext4_fc_perform_commit(journal_t *journal, tid_t commit_tid)
> > {
> > struct super_block *sb = journal->j_private;
> > struct ext4_sb_info *sbi = EXT4_SB(sb);
> > @@ -1286,10 +1318,15 @@ static int ext4_fc_perform_commit(journal_t *journal)
> > struct inode *inode;
> > struct inode **inodes;
> > unsigned int inodes_size;
> > + unsigned int snap_inodes = 0;
> > + unsigned int snap_ranges = 0;
> > + int snap_err = EXT4_FC_SNAP_ERR_NONE;
> > struct blk_plug plug;
> > int ret = 0;
> > u32 crc = 0;
> > int alloc_ctx;
> > + ktime_t lock_start;
> > + u64 locked_ns;
> >
> > /*
> > * Step 1: Mark all inodes on s_fc_q[MAIN] with
> > @@ -1337,13 +1374,13 @@ static int ext4_fc_perform_commit(journal_t *journal)
> > if (ret)
> > return ret;
> >
> > -
> > ret = ext4_fc_alloc_snapshot_inodes(sb, &inodes, &inodes_size);
> > if (ret)
> > return ret;
> >
> > /* Step 4: Mark all inodes as being committed. */
> > jbd2_journal_lock_updates(journal);
> > + lock_start = ktime_get();
> > /*
> > * The journal is now locked. No more handles can start and all the
> > * previous handles are now drained. Snapshotting happens in this
> > @@ -1357,8 +1394,12 @@ static int ext4_fc_perform_commit(journal_t *journal)
> > }
> > ext4_fc_unlock(sb, alloc_ctx);
> >
> > - ret = ext4_fc_snapshot_inodes(journal, inodes, inodes_size);
> > + ret = ext4_fc_snapshot_inodes(journal, inodes, inodes_size,
> > + &snap_inodes, &snap_ranges, &snap_err);
> > jbd2_journal_unlock_updates(journal);
> > + locked_ns = ktime_to_ns(ktime_sub(ktime_get(), lock_start));
>
> If locked_ns is only used for the tracepoint, it should either be
> calculated in the tracepoint, or add:
>
> if (trace_ext4_fc_lock_updates_enabled()) {
> locked_ns = ktime_to_ns(ktime_sub(ktime_get(), lock_start));
Good catch!
> > + trace_ext4_fc_lock_updates(sb, commit_tid, locked_ns, snap_inodes,
> > + snap_ranges, ret, snap_err);
>
> }
>
> Note, we are going to also add a code to call the tracepoint directly, to
> remove the double static_branch.
>
> https://lore.kernel.org/all/20260312150523.2054552-1-vineeth@xxxxxxxxxxxxxxx/
>
> But that code is still being worked on so you don't need to worry about it
> at the moment.
Thank you! I will monitor this patch set and incorporate it into future versions if possible.
>
> > kvfree(inodes);
> > if (ret)
> > return ret;
> > @@ -1563,7 +1604,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
> > journal_ioprio = EXT4_DEF_JOURNAL_IOPRIO;
> > set_task_ioprio(current, journal_ioprio);
> > fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
> > - ret = ext4_fc_perform_commit(journal);
> > + ret = ext4_fc_perform_commit(journal, commit_tid);
> > if (ret < 0) {
> > if (ret == -EAGAIN || ret == -E2BIG || ret == -ECANCELED)
> > status = EXT4_FC_STATUS_INELIGIBLE;
> > diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> > index fd76d14c2776e..dc084f39b74ad 100644
> > --- a/include/trace/events/ext4.h
> > +++ b/include/trace/events/ext4.h
> > @@ -104,6 +104,26 @@ TRACE_DEFINE_ENUM(EXT4_FC_REASON_INODE_JOURNAL_DATA);
> > TRACE_DEFINE_ENUM(EXT4_FC_REASON_ENCRYPTED_FILENAME);
> > TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX);
> >
> > +#undef EM
> > +#undef EMe
> > +#define EM(a) TRACE_DEFINE_ENUM(EXT4_FC_SNAP_ERR_##a);
> > +#define EMe(a) TRACE_DEFINE_ENUM(EXT4_FC_SNAP_ERR_##a);
> > +
> > +#define TRACE_SNAP_ERR \
> > + EM(NONE) \
> > + EM(ES_MISS) \
> > + EM(ES_DELAYED) \
> > + EM(ES_OTHER) \
> > + EM(INODES_CAP) \
> > + EM(RANGES_CAP) \
> > + EM(NOMEM) \
> > + EMe(INODE_LOC)
> > +
> > +TRACE_SNAP_ERR
> > +
> > +#undef EM
> > +#undef EMe
> > +
> > #define show_fc_reason(reason) \
> > __print_symbolic(reason, \
> > { EXT4_FC_REASON_XATTR, "XATTR"}, \
> > @@ -2812,6 +2832,47 @@ TRACE_EVENT(ext4_fc_commit_stop,
> > __entry->num_fc_ineligible, __entry->nblks_agg, __entry->tid)
> > );
> >
> > +#define EM(a) { EXT4_FC_SNAP_ERR_##a, #a },
> > +#define EMe(a) { EXT4_FC_SNAP_ERR_##a, #a }
> > +
> > +TRACE_EVENT(ext4_fc_lock_updates,
> > + TP_PROTO(struct super_block *sb, tid_t commit_tid, u64 locked_ns,
> > + unsigned int nr_inodes, unsigned int nr_ranges, int err,
> > + int snap_err),
> > +
> > + TP_ARGS(sb, commit_tid, locked_ns, nr_inodes, nr_ranges, err, snap_err),
> > +
> > + TP_STRUCT__entry(/* entry */
> > + __field(dev_t, dev)
> > + __field(tid_t, tid)
> > + __field(u64, locked_ns)
> > + __field(unsigned int, nr_inodes)
> > + __field(unsigned int, nr_ranges)
> > + __field(int, err)
> > + __field(int, snap_err)
> > + ),
> > +
> > + TP_fast_assign(/* assign */
> > + __entry->dev = sb->s_dev;
> > + __entry->tid = commit_tid;
> > + __entry->locked_ns = locked_ns;
> > + __entry->nr_inodes = nr_inodes;
> > + __entry->nr_ranges = nr_ranges;
> > + __entry->err = err;
> > + __entry->snap_err = snap_err;
> > + ),
> > +
> > + TP_printk("dev %d,%d tid %u locked_ns %llu nr_inodes %u nr_ranges %u err %d snap_err %s",
> > + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
> > + __entry->locked_ns, __entry->nr_inodes, __entry->nr_ranges,
> > + __entry->err, __print_symbolic(__entry->snap_err,
> > + TRACE_SNAP_ERR))
> > +);
> > +
> > +#undef EM
> > +#undef EMe
> > +#undef TRACE_SNAP_ERR
> > +
> > #define FC_REASON_NAME_STAT(reason) \
> > show_fc_reason(reason), \
> > __entry->fc_ineligible_rc[reason]
>
>
Regards,
Li