[PATCH v2 06/10] ext4: move ordered data handling out of ext4_block_do_zero_range()
From: Zhang Yi
Date: Wed Mar 25 2026 - 03:35:08 EST
From: Zhang Yi <yi.zhang@xxxxxxxxxx>
Remove the handle parameter from ext4_block_do_zero_range() and move the
ordered data handling to ext4_block_zero_eof().
This is necessary for truncate up and append writes across a range
extending beyond EOF. The ordered data must be committed before updating
i_disksize to prevent exposing stale on-disk data from concurrent
post-EOF mmap writes during previous folio writeback or in case of
system crash during append writes.
This is unnecessary for partial block hole punching because the entire
punch operation does not provide atomicity guarantees and can already
expose intermediate results in case of crash.
Hole punching can only ever expose data that was there before the punch
but missed zeroing during append / truncate could expose data that was
not visible in the file before the operation.
Since ordered data handling is no longer performed inside
ext4_zero_partial_blocks(), ext4_punch_hole() no longer needs to attach
jinode.
This is prepared for the conversion to the iomap infrastructure, which
does not use ordered data mode while zeroing post-EOF partial blocks.
Signed-off-by: Zhang Yi <yi.zhang@xxxxxxxxxx>
Reviewed-by: Jan Kara <jack@xxxxxxx>
---
fs/ext4/inode.c | 58 ++++++++++++++++++++++++-------------------------
1 file changed, 29 insertions(+), 29 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3c3c07fd00ba..84dd3140793d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4075,12 +4075,12 @@ static struct buffer_head *ext4_load_tail_bh(struct inode *inode, loff_t from)
return err ? ERR_PTR(err) : NULL;
}
-static int ext4_block_do_zero_range(handle_t *handle, struct inode *inode,
- loff_t from, loff_t length, bool *did_zero)
+static int ext4_block_do_zero_range(struct inode *inode, loff_t from,
+ loff_t length, bool *did_zero,
+ bool *zero_written)
{
struct buffer_head *bh;
struct folio *folio;
- int err = 0;
bh = ext4_load_tail_bh(inode, from);
if (IS_ERR_OR_NULL(bh))
@@ -4091,19 +4091,14 @@ static int ext4_block_do_zero_range(handle_t *handle, struct inode *inode,
BUFFER_TRACE(bh, "zeroed end of block");
mark_buffer_dirty(bh);
- /*
- * Only the written block requires ordered data to prevent exposing
- * stale data.
- */
- if (ext4_should_order_data(inode) &&
- !buffer_unwritten(bh) && !buffer_delay(bh))
- err = ext4_jbd2_inode_add_write(handle, inode, from, length);
- if (!err && did_zero)
+ if (did_zero)
*did_zero = true;
+ if (zero_written && !buffer_unwritten(bh) && !buffer_delay(bh))
+ *zero_written = true;
folio_unlock(folio);
folio_put(folio);
- return err;
+ return 0;
}
static int ext4_block_journalled_zero_range(handle_t *handle,
@@ -4147,7 +4142,8 @@ static int ext4_block_journalled_zero_range(handle_t *handle,
* that corresponds to 'from'
*/
static int ext4_block_zero_range(handle_t *handle, struct inode *inode,
- loff_t from, loff_t length, bool *did_zero)
+ loff_t from, loff_t length, bool *did_zero,
+ bool *zero_written)
{
unsigned blocksize = inode->i_sb->s_blocksize;
unsigned int max = blocksize - (from & (blocksize - 1));
@@ -4166,7 +4162,8 @@ static int ext4_block_zero_range(handle_t *handle, struct inode *inode,
return ext4_block_journalled_zero_range(handle, inode, from,
length, did_zero);
}
- return ext4_block_do_zero_range(handle, inode, from, length, did_zero);
+ return ext4_block_do_zero_range(inode, from, length, did_zero,
+ zero_written);
}
/*
@@ -4183,6 +4180,7 @@ int ext4_block_zero_eof(handle_t *handle, struct inode *inode,
unsigned int offset;
loff_t length = end - from;
bool did_zero = false;
+ bool zero_written = false;
int err;
offset = from & (blocksize - 1);
@@ -4195,9 +4193,22 @@ int ext4_block_zero_eof(handle_t *handle, struct inode *inode,
if (length > blocksize - offset)
length = blocksize - offset;
- err = ext4_block_zero_range(handle, inode, from, length, &did_zero);
+ err = ext4_block_zero_range(handle, inode, from, length,
+ &did_zero, &zero_written);
if (err)
return err;
+ /*
+ * It's necessary to order zeroed data before update i_disksize when
+ * truncating up or performing an append write, because there might be
+ * exposing stale on-disk data which may caused by concurrent post-EOF
+ * mmap write during folio writeback.
+ */
+ if (ext4_should_order_data(inode) &&
+ did_zero && zero_written && !IS_DAX(inode)) {
+ err = ext4_jbd2_inode_add_write(handle, inode, from, length);
+ if (err)
+ return err;
+ }
return did_zero ? length : 0;
}
@@ -4221,13 +4232,13 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
if (start == end &&
(partial_start || (partial_end != sb->s_blocksize - 1))) {
err = ext4_block_zero_range(handle, inode, lstart,
- length, NULL);
+ length, NULL, NULL);
return err;
}
/* Handle partial zero out on the start of the range */
if (partial_start) {
err = ext4_block_zero_range(handle, inode, lstart,
- sb->s_blocksize, NULL);
+ sb->s_blocksize, NULL, NULL);
if (err)
return err;
}
@@ -4235,7 +4246,7 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
if (partial_end != sb->s_blocksize - 1)
err = ext4_block_zero_range(handle, inode,
byte_end - partial_end,
- partial_end + 1, NULL);
+ partial_end + 1, NULL, NULL);
return err;
}
@@ -4410,17 +4421,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
end = max_end;
length = end - offset;
- /*
- * Attach jinode to inode for jbd2 if we do any zeroing of partial
- * block.
- */
- if (!IS_ALIGNED(offset | end, sb->s_blocksize)) {
- ret = ext4_inode_attach_jinode(inode);
- if (ret < 0)
- return ret;
- }
-
-
ret = ext4_update_disksize_before_punch(inode, offset, length);
if (ret)
return ret;
--
2.52.0