Re: [PATCH 0/3] vmsplice: make vmsplice a trivial wrapper for preadv2/pwritev2
From: Linus Torvalds
Date: Tue Jun 02 2026 - 20:11:56 EST
On Tue, 2 Jun 2026 at 15:54, Askar Safin <safinaskar@xxxxxxxxx> wrote:
>
> Pedro is talking here not about this vmsplice patch, but about
> my future hypothetical patch, which will remove splice-pagecache-to-pipe.
That absolutely would be my suggested next step.
Something like the attached - get rid of filemap_splice_read()
entirely, and just replace it with copy_splice_read().
That also make the whole O_DIRECT and DAX special case just simply go away.
This is - in case there was any question about it - ENTIRELY untested.
It may not compile.
And if it does compile, it may do unspeakable things to your pets.
So think of this as nothing more than a "something like this". It does
leave "splice_read" around, and it intentionally just does that
#define filemap_splice_read copy_splice_read
to not have to modify all the existing users one by one.
It would be interesting to hear if there are any actual real loads
that would ever notice?
Linus
fs/splice.c | 6 --
include/linux/fs.h | 4 +-
mm/filemap.c | 145 ------------------------------------------------
mm/internal.h | 6 --
mm/shmem.c | 159 +----------------------------------------------------
5 files changed, 2 insertions(+), 318 deletions(-)
diff --git a/fs/splice.c b/fs/splice.c
index 9d8f63e2fd1a..37136b9a6612 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -971,12 +971,6 @@ static ssize_t do_splice_read(struct file *in, loff_t *ppos,
if (unlikely(!in->f_op->splice_read))
return warn_unsupported(in, "read");
- /*
- * O_DIRECT and DAX don't deal with the pagecache, so we allocate a
- * buffer, copy into it and splice that into the pipe.
- */
- if ((in->f_flags & O_DIRECT) || IS_DAX(in->f_mapping->host))
- return copy_splice_read(in, ppos, pipe, len, flags);
return in->f_op->splice_read(in, ppos, pipe, len, flags);
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 11559c513dfb..e623c2804468 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3072,9 +3072,7 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
struct iov_iter *iter);
/* fs/splice.c */
-ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe,
- size_t len, unsigned int flags);
+#define filemap_splice_read copy_splice_read
ssize_t copy_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
diff --git a/mm/filemap.c b/mm/filemap.c
index 4e636647100c..c0dbcbb84dba 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2999,151 +2999,6 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
EXPORT_SYMBOL(generic_file_read_iter);
-/*
- * Splice subpages from a folio into a pipe.
- */
-size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
- struct folio *folio, loff_t fpos, size_t size)
-{
- struct page *page;
- size_t spliced = 0, offset = offset_in_folio(folio, fpos);
-
- page = folio_page(folio, offset / PAGE_SIZE);
- size = min(size, folio_size(folio) - offset);
- offset %= PAGE_SIZE;
-
- while (spliced < size && !pipe_is_full(pipe)) {
- struct pipe_buffer *buf = pipe_head_buf(pipe);
- size_t part = min_t(size_t, PAGE_SIZE - offset, size - spliced);
-
- *buf = (struct pipe_buffer) {
- .ops = &page_cache_pipe_buf_ops,
- .page = page,
- .offset = offset,
- .len = part,
- };
- folio_get(folio);
- pipe->head++;
- page++;
- spliced += part;
- offset = 0;
- }
-
- return spliced;
-}
-
-/**
- * filemap_splice_read - Splice data from a file's pagecache into a pipe
- * @in: The file to read from
- * @ppos: Pointer to the file position to read from
- * @pipe: The pipe to splice into
- * @len: The amount to splice
- * @flags: The SPLICE_F_* flags
- *
- * This function gets folios from a file's pagecache and splices them into the
- * pipe. Readahead will be called as necessary to fill more folios. This may
- * be used for blockdevs also.
- *
- * Return: On success, the number of bytes read will be returned and *@ppos
- * will be updated if appropriate; 0 will be returned if there is no more data
- * to be read; -EAGAIN will be returned if the pipe had no space, and some
- * other negative error code will be returned on error. A short read may occur
- * if the pipe has insufficient space, we reach the end of the data or we hit a
- * hole.
- */
-ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe,
- size_t len, unsigned int flags)
-{
- struct folio_batch fbatch;
- struct kiocb iocb;
- size_t total_spliced = 0, used, npages;
- loff_t isize, end_offset;
- bool writably_mapped;
- int i, error = 0;
-
- if (unlikely(*ppos >= in->f_mapping->host->i_sb->s_maxbytes))
- return 0;
-
- init_sync_kiocb(&iocb, in);
- iocb.ki_pos = *ppos;
-
- /* Work out how much data we can actually add into the pipe */
- used = pipe_buf_usage(pipe);
- npages = max_t(ssize_t, pipe->max_usage - used, 0);
- len = min_t(size_t, len, npages * PAGE_SIZE);
-
- folio_batch_init(&fbatch);
-
- do {
- cond_resched();
-
- if (*ppos >= i_size_read(in->f_mapping->host))
- break;
-
- iocb.ki_pos = *ppos;
- error = filemap_get_pages(&iocb, len, &fbatch, true);
- if (error < 0)
- break;
-
- /*
- * i_size must be checked after we know the pages are Uptodate.
- *
- * Checking i_size after the check allows us to calculate
- * the correct value for "nr", which means the zero-filled
- * part of the page is not copied back to userspace (unless
- * another truncate extends the file - this is desired though).
- */
- isize = i_size_read(in->f_mapping->host);
- if (unlikely(*ppos >= isize))
- break;
- end_offset = min_t(loff_t, isize, *ppos + len);
-
- /*
- * Once we start copying data, we don't want to be touching any
- * cachelines that might be contended:
- */
- writably_mapped = mapping_writably_mapped(in->f_mapping);
-
- for (i = 0; i < folio_batch_count(&fbatch); i++) {
- struct folio *folio = fbatch.folios[i];
- size_t n;
-
- if (folio_pos(folio) >= end_offset)
- goto out;
- folio_mark_accessed(folio);
-
- /*
- * If users can be writing to this folio using arbitrary
- * virtual addresses, take care of potential aliasing
- * before reading the folio on the kernel side.
- */
- if (writably_mapped)
- flush_dcache_folio(folio);
-
- n = min_t(loff_t, len, isize - *ppos);
- n = splice_folio_into_pipe(pipe, folio, *ppos, n);
- if (!n)
- goto out;
- len -= n;
- total_spliced += n;
- *ppos += n;
- in->f_ra.prev_pos = *ppos;
- if (pipe_is_full(pipe))
- goto out;
- }
-
- folio_batch_release(&fbatch);
- } while (len);
-
-out:
- folio_batch_release(&fbatch);
- file_accessed(in);
-
- return total_spliced ? total_spliced : error;
-}
-EXPORT_SYMBOL(filemap_splice_read);
-
static inline loff_t folio_seek_hole_data(struct xa_state *xas,
struct address_space *mapping, struct folio *folio,
loff_t start, loff_t end, bool seek_data)
diff --git a/mm/internal.h b/mm/internal.h
index 5a2ddcf68e0b..c0ca0df5ac7e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1521,12 +1521,6 @@ struct migration_target_control {
enum migrate_reason reason;
};
-/*
- * mm/filemap.c
- */
-size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
- struct folio *folio, loff_t fpos, size_t size);
-
/*
* mm/vmalloc.c
*/
diff --git a/mm/shmem.c b/mm/shmem.c
index 3b5dc21b323c..92138b7277b5 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3481,163 +3481,6 @@ static ssize_t shmem_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ret;
}
-static bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- return true;
-}
-
-static void zero_pipe_buf_release(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
-}
-
-static bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- return false;
-}
-
-static const struct pipe_buf_operations zero_pipe_buf_ops = {
- .release = zero_pipe_buf_release,
- .try_steal = zero_pipe_buf_try_steal,
- .get = zero_pipe_buf_get,
-};
-
-static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe,
- loff_t fpos, size_t size)
-{
- size_t offset = fpos & ~PAGE_MASK;
-
- size = min_t(size_t, size, PAGE_SIZE - offset);
-
- if (!pipe_is_full(pipe)) {
- struct pipe_buffer *buf = pipe_head_buf(pipe);
-
- *buf = (struct pipe_buffer) {
- .ops = &zero_pipe_buf_ops,
- .page = ZERO_PAGE(0),
- .offset = offset,
- .len = size,
- };
- pipe->head++;
- }
-
- return size;
-}
-
-static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
- struct pipe_inode_info *pipe,
- size_t len, unsigned int flags)
-{
- struct inode *inode = file_inode(in);
- struct address_space *mapping = inode->i_mapping;
- struct folio *folio = NULL;
- size_t total_spliced = 0, used, npages, n, part;
- loff_t isize;
- int error = 0;
-
- /* Work out how much data we can actually add into the pipe */
- used = pipe_buf_usage(pipe);
- npages = max_t(ssize_t, pipe->max_usage - used, 0);
- len = min_t(size_t, len, npages * PAGE_SIZE);
-
- do {
- bool fallback_page_splice = false;
- struct page *page = NULL;
- pgoff_t index;
- size_t size;
-
- if (*ppos >= i_size_read(inode))
- break;
-
- index = *ppos >> PAGE_SHIFT;
- error = shmem_get_folio(inode, index, 0, &folio, SGP_READ);
- if (error) {
- if (error == -EINVAL)
- error = 0;
- break;
- }
- if (folio) {
- folio_unlock(folio);
-
- page = folio_file_page(folio, index);
- if (PageHWPoison(page)) {
- error = -EIO;
- break;
- }
-
- if (folio_test_large(folio) &&
- folio_test_has_hwpoisoned(folio))
- fallback_page_splice = true;
- }
-
- /*
- * i_size must be checked after we know the pages are Uptodate.
- *
- * Checking i_size after the check allows us to calculate
- * the correct value for "nr", which means the zero-filled
- * part of the page is not copied back to userspace (unless
- * another truncate extends the file - this is desired though).
- */
- isize = i_size_read(inode);
- if (unlikely(*ppos >= isize))
- break;
- /*
- * Fallback to PAGE_SIZE splice if the large folio has hwpoisoned
- * pages.
- */
- size = len;
- if (unlikely(fallback_page_splice)) {
- size_t offset = *ppos & ~PAGE_MASK;
-
- size = umin(size, PAGE_SIZE - offset);
- }
- part = min_t(loff_t, isize - *ppos, size);
-
- if (folio) {
- /*
- * If users can be writing to this page using arbitrary
- * virtual addresses, take care about potential aliasing
- * before reading the page on the kernel side.
- */
- if (mapping_writably_mapped(mapping)) {
- if (likely(!fallback_page_splice))
- flush_dcache_folio(folio);
- else
- flush_dcache_page(page);
- }
- folio_mark_accessed(folio);
- /*
- * Ok, we have the page, and it's up-to-date, so we can
- * now splice it into the pipe.
- */
- n = splice_folio_into_pipe(pipe, folio, *ppos, part);
- folio_put(folio);
- folio = NULL;
- } else {
- n = splice_zeropage_into_pipe(pipe, *ppos, part);
- }
-
- if (!n)
- break;
- len -= n;
- total_spliced += n;
- *ppos += n;
- in->f_ra.prev_pos = *ppos;
- if (pipe_is_full(pipe))
- break;
-
- cond_resched();
- } while (len);
-
- if (folio)
- folio_put(folio);
-
- file_accessed(in);
- return total_spliced ? total_spliced : error;
-}
-
static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
{
struct address_space *mapping = file->f_mapping;
@@ -5223,7 +5066,7 @@ static const struct file_operations shmem_file_operations = {
.read_iter = shmem_file_read_iter,
.write_iter = shmem_file_write_iter,
.fsync = noop_fsync,
- .splice_read = shmem_file_splice_read,
+ .splice_read = copy_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = shmem_fallocate,
.setlease = generic_setlease,