Re: [PATCH 06/10] ext4: move ordered data handling out of ext4_block_do_zero_range()
From: Jan Kara
Date: Mon Mar 23 2026 - 13:54:54 EST
On Tue 10-03-26 09:40:57, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@xxxxxxxxxx>
>
> Remove the handle parameter from ext4_block_do_zero_range() and move the
> ordered data handling to ext4_block_zero_eof().
>
> This is necessary for truncate up and append writes across a range
> extending beyond EOF. The ordered data must be committed before updating
> i_disksize to prevent exposing stale on-disk data from concurrent
> post-EOF mmap writes during previous folio writeback or in case of
> system crash during append writes.
>
> This is unnecessary for partial block hole punching because the entire
> punch operation does not provide atomicity guarantees and can already
> expose intermediate results in case of crash.
Also because hole punching can only ever expose data that was there before
the punch but missed zeroing during append / truncate could expose data
that was not visible in the file before the operation.
> Since ordered data handling is no longer performed inside
> ext4_zero_partial_blocks(), ext4_punch_hole() no longer needs to attach
> jinode.
>
> This is prepared for the conversion to the iomap infrastructure, which
> does not use ordered data mode while zeroing post-EOF partial blocks.
>
> Signed-off-by: Zhang Yi <yi.zhang@xxxxxxxxxx>
The patch looks good. Feel free to add:
Reviewed-by: Jan Kara <jack@xxxxxxx>
Honza
> ---
> fs/ext4/inode.c | 58 ++++++++++++++++++++++++-------------------------
> 1 file changed, 29 insertions(+), 29 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 86f169df226a..8fea044b3bff 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4076,12 +4076,12 @@ static struct buffer_head *ext4_block_get_zero_range(struct inode *inode,
> return err ? ERR_PTR(err) : NULL;
> }
>
> -static int ext4_block_do_zero_range(handle_t *handle, struct inode *inode,
> - loff_t from, loff_t length, bool *did_zero)
> +static int ext4_block_do_zero_range(struct inode *inode, loff_t from,
> + loff_t length, bool *did_zero,
> + bool *zero_written)
> {
> struct buffer_head *bh;
> struct folio *folio;
> - int err = 0;
>
> bh = ext4_block_get_zero_range(inode, from, length);
> if (IS_ERR_OR_NULL(bh))
> @@ -4092,19 +4092,14 @@ static int ext4_block_do_zero_range(handle_t *handle, struct inode *inode,
> BUFFER_TRACE(bh, "zeroed end of block");
>
> mark_buffer_dirty(bh);
> - /*
> - * Only the written block requires ordered data to prevent exposing
> - * stale data.
> - */
> - if (ext4_should_order_data(inode) &&
> - !buffer_unwritten(bh) && !buffer_delay(bh))
> - err = ext4_jbd2_inode_add_write(handle, inode, from, length);
> - if (!err && did_zero)
> + if (did_zero)
> *did_zero = true;
> + if (zero_written && !buffer_unwritten(bh) && !buffer_delay(bh))
> + *zero_written = true;
>
> folio_unlock(folio);
> folio_put(folio);
> - return err;
> + return 0;
> }
>
> static int ext4_block_journalled_zero_range(handle_t *handle,
> @@ -4148,7 +4143,8 @@ static int ext4_block_journalled_zero_range(handle_t *handle,
> * that corresponds to 'from'
> */
> static int ext4_block_zero_range(handle_t *handle, struct inode *inode,
> - loff_t from, loff_t length, bool *did_zero)
> + loff_t from, loff_t length, bool *did_zero,
> + bool *zero_written)
> {
> unsigned blocksize = inode->i_sb->s_blocksize;
> unsigned int max = blocksize - (from & (blocksize - 1));
> @@ -4167,7 +4163,8 @@ static int ext4_block_zero_range(handle_t *handle, struct inode *inode,
> return ext4_block_journalled_zero_range(handle, inode, from,
> length, did_zero);
> }
> - return ext4_block_do_zero_range(handle, inode, from, length, did_zero);
> + return ext4_block_do_zero_range(inode, from, length, did_zero,
> + zero_written);
> }
>
> /*
> @@ -4184,6 +4181,7 @@ int ext4_block_zero_eof(handle_t *handle, struct inode *inode,
> unsigned int offset;
> loff_t length = end - from;
> bool did_zero = false;
> + bool zero_written = false;
> int err;
>
> offset = from & (blocksize - 1);
> @@ -4196,9 +4194,22 @@ int ext4_block_zero_eof(handle_t *handle, struct inode *inode,
> if (length > blocksize - offset)
> length = blocksize - offset;
>
> - err = ext4_block_zero_range(handle, inode, from, length, &did_zero);
> + err = ext4_block_zero_range(handle, inode, from, length,
> + &did_zero, &zero_written);
> if (err)
> return err;
> + /*
> + * It's necessary to order zeroed data before update i_disksize when
> + * truncating up or performing an append write, because there might be
> + * exposing stale on-disk data which may caused by concurrent post-EOF
> + * mmap write during folio writeback.
> + */
> + if (ext4_should_order_data(inode) &&
> + did_zero && zero_written && !IS_DAX(inode)) {
> + err = ext4_jbd2_inode_add_write(handle, inode, from, length);
> + if (err)
> + return err;
> + }
>
> return did_zero ? length : 0;
> }
> @@ -4222,13 +4233,13 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
> if (start == end &&
> (partial_start || (partial_end != sb->s_blocksize - 1))) {
> err = ext4_block_zero_range(handle, inode, lstart,
> - length, NULL);
> + length, NULL, NULL);
> return err;
> }
> /* Handle partial zero out on the start of the range */
> if (partial_start) {
> err = ext4_block_zero_range(handle, inode, lstart,
> - sb->s_blocksize, NULL);
> + sb->s_blocksize, NULL, NULL);
> if (err)
> return err;
> }
> @@ -4236,7 +4247,7 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
> if (partial_end != sb->s_blocksize - 1)
> err = ext4_block_zero_range(handle, inode,
> byte_end - partial_end,
> - partial_end + 1, NULL);
> + partial_end + 1, NULL, NULL);
> return err;
> }
>
> @@ -4411,17 +4422,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
> end = max_end;
> length = end - offset;
>
> - /*
> - * Attach jinode to inode for jbd2 if we do any zeroing of partial
> - * block.
> - */
> - if (!IS_ALIGNED(offset | end, sb->s_blocksize)) {
> - ret = ext4_inode_attach_jinode(inode);
> - if (ret < 0)
> - return ret;
> - }
> -
> -
> ret = ext4_update_disksize_before_punch(inode, offset, length);
> if (ret)
> return ret;
> --
> 2.52.0
>
--
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR