Merge branch 'work.splice_read' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Oct 2016 22:36:58 +0000 (15:36 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Oct 2016 22:36:58 +0000 (15:36 -0700)
Pull VFS splice updates from Al Viro:
 "There's a bunch of branches this cycle, both mine and from other folks
  and I'd rather send pull requests separately.

  This one is the conversion of ->splice_read() to ITER_PIPE iov_iter
  (and introduction of such). Gets rid of a lot of code in fs/splice.c
  and elsewhere; there will be followups, but these are for the next
  cycle...  Some pipe/splice-related cleanups from Miklos in the same
  branch as well"

* 'work.splice_read' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  pipe: fix comment in pipe_buf_operations
  pipe: add pipe_buf_steal() helper
  pipe: add pipe_buf_confirm() helper
  pipe: add pipe_buf_release() helper
  pipe: add pipe_buf_get() helper
  relay: simplify relay_file_read()
  switch default_file_splice_read() to use of pipe-backed iov_iter
  switch generic_file_splice_read() to use of ->read_iter()
  new iov_iter flavour: pipe-backed
  fuse_dev_splice_read(): switch to add_to_pipe()
  skb_splice_bits(): get rid of callback
  new helper: add_to_pipe()
  splice: lift pipe_lock out of splice_to_pipe()
  splice: switch get_iovec_page_array() to iov_iter
  splice_to_pipe(): don't open-code wakeup_pipe_readers()
  consistent treatment of EFAULT on O_DIRECT read/write

30 files changed:
drivers/char/virtio_console.c
drivers/staging/lustre/lustre/llite/file.c
drivers/staging/lustre/lustre/llite/llite_internal.h
drivers/staging/lustre/lustre/llite/vvp_internal.h
drivers/staging/lustre/lustre/llite/vvp_io.c
fs/coda/file.c
fs/direct-io.c
fs/fuse/dev.c
fs/gfs2/file.c
fs/nfs/file.c
fs/nfs/internal.h
fs/nfs/nfs4file.c
fs/ocfs2/file.c
fs/ocfs2/ocfs2_trace.h
fs/pipe.c
fs/splice.c
fs/xfs/xfs_file.c
fs/xfs/xfs_trace.h
include/linux/fs.h
include/linux/pipe_fs_i.h
include/linux/skbuff.h
include/linux/splice.h
include/linux/uio.h
kernel/relay.c
lib/iov_iter.c
mm/shmem.c
net/core/skbuff.c
net/ipv4/tcp.c
net/kcm/kcmsock.c
net/unix/af_unix.c

index 5da47e2..8114744 100644 (file)
@@ -889,7 +889,7 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                return 0;
 
        /* Try lock this page */
-       if (buf->ops->steal(pipe, buf) == 0) {
+       if (pipe_buf_steal(pipe, buf) == 0) {
                /* Get reference and unlock page for moving */
                get_page(buf->page);
                unlock_page(buf->page);
index 6e3a188..d56863f 100644 (file)
@@ -1138,45 +1138,31 @@ restart:
                        range_lock_init(&range, *ppos, *ppos + count - 1);
 
                vio->vui_fd  = LUSTRE_FPRIVATE(file);
-               vio->vui_io_subtype = args->via_io_subtype;
+               vio->vui_iter = args->u.normal.via_iter;
+               vio->vui_iocb = args->u.normal.via_iocb;
+               /*
+                * Direct IO reads must also take range lock,
+                * or multiple reads will try to work on the same pages
+                * See LU-6227 for details.
+                */
+               if (((iot == CIT_WRITE) ||
+                    (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
+                   !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+                       CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
+                              range.rl_node.in_extent.start,
+                              range.rl_node.in_extent.end);
+                       result = range_lock(&lli->lli_write_tree,
+                                           &range);
+                       if (result < 0)
+                               goto out;
 
-               switch (vio->vui_io_subtype) {
-               case IO_NORMAL:
-                       vio->vui_iter = args->u.normal.via_iter;
-                       vio->vui_iocb = args->u.normal.via_iocb;
-                       /*
-                        * Direct IO reads must also take range lock,
-                        * or multiple reads will try to work on the same pages
-                        * See LU-6227 for details.
-                        */
-                       if (((iot == CIT_WRITE) ||
-                            (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
-                           !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-                               CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
-                                      range.rl_node.in_extent.start,
-                                      range.rl_node.in_extent.end);
-                               result = range_lock(&lli->lli_write_tree,
-                                                   &range);
-                               if (result < 0)
-                                       goto out;
-
-                               range_locked = true;
-                       }
-                       down_read(&lli->lli_trunc_sem);
-                       break;
-               case IO_SPLICE:
-                       vio->u.splice.vui_pipe = args->u.splice.via_pipe;
-                       vio->u.splice.vui_flags = args->u.splice.via_flags;
-                       break;
-               default:
-                       CERROR("Unknown IO type - %u\n", vio->vui_io_subtype);
-                       LBUG();
+                       range_locked = true;
                }
+               down_read(&lli->lli_trunc_sem);
                ll_cl_add(file, env, io);
                result = cl_io_loop(env, io);
                ll_cl_remove(file, env);
-               if (args->via_io_subtype == IO_NORMAL)
-                       up_read(&lli->lli_trunc_sem);
+               up_read(&lli->lli_trunc_sem);
                if (range_locked) {
                        CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",
                               range.rl_node.in_extent.start,
@@ -1235,7 +1221,7 @@ static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        if (IS_ERR(env))
                return PTR_ERR(env);
 
-       args = ll_env_args(env, IO_NORMAL);
+       args = ll_env_args(env);
        args->u.normal.via_iter = to;
        args->u.normal.via_iocb = iocb;
 
@@ -1259,7 +1245,7 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        if (IS_ERR(env))
                return PTR_ERR(env);
 
-       args = ll_env_args(env, IO_NORMAL);
+       args = ll_env_args(env);
        args->u.normal.via_iter = from;
        args->u.normal.via_iocb = iocb;
 
@@ -1269,31 +1255,6 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        return result;
 }
 
-/*
- * Send file content (through pagecache) somewhere with helper
- */
-static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
-                                  struct pipe_inode_info *pipe, size_t count,
-                                  unsigned int flags)
-{
-       struct lu_env      *env;
-       struct vvp_io_args *args;
-       ssize_t      result;
-       int              refcheck;
-
-       env = cl_env_get(&refcheck);
-       if (IS_ERR(env))
-               return PTR_ERR(env);
-
-       args = ll_env_args(env, IO_SPLICE);
-       args->u.splice.via_pipe = pipe;
-       args->u.splice.via_flags = flags;
-
-       result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
-       cl_env_put(env, &refcheck);
-       return result;
-}
-
 int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
                             __u64 flags, struct lov_user_md *lum,
                             int lum_size)
@@ -3267,7 +3228,7 @@ struct file_operations ll_file_operations = {
        .release        = ll_file_release,
        .mmap      = ll_file_mmap,
        .llseek  = ll_file_seek,
-       .splice_read    = ll_file_splice_read,
+       .splice_read    = generic_file_splice_read,
        .fsync    = ll_fsync,
        .flush    = ll_flush
 };
@@ -3280,7 +3241,7 @@ struct file_operations ll_file_operations_flock = {
        .release        = ll_file_release,
        .mmap      = ll_file_mmap,
        .llseek  = ll_file_seek,
-       .splice_read    = ll_file_splice_read,
+       .splice_read    = generic_file_splice_read,
        .fsync    = ll_fsync,
        .flush    = ll_flush,
        .flock    = ll_file_flock,
@@ -3296,7 +3257,7 @@ struct file_operations ll_file_operations_noflock = {
        .release        = ll_file_release,
        .mmap      = ll_file_mmap,
        .llseek  = ll_file_seek,
-       .splice_read    = ll_file_splice_read,
+       .splice_read    = generic_file_splice_read,
        .fsync    = ll_fsync,
        .flush    = ll_flush,
        .flock    = ll_file_noflock,
index 3e98bd6..4bc5512 100644 (file)
@@ -908,17 +908,11 @@ void vvp_write_complete(struct vvp_object *club, struct vvp_page *page);
  */
 struct vvp_io_args {
        /** normal/splice */
-       enum vvp_io_subtype via_io_subtype;
-
        union {
                struct {
                        struct kiocb      *via_iocb;
                        struct iov_iter   *via_iter;
                } normal;
-               struct {
-                       struct pipe_inode_info  *via_pipe;
-                       unsigned int       via_flags;
-               } splice;
        } u;
 };
 
@@ -946,14 +940,9 @@ static inline struct ll_thread_info *ll_env_info(const struct lu_env *env)
        return lti;
 }
 
-static inline struct vvp_io_args *ll_env_args(const struct lu_env *env,
-                                             enum vvp_io_subtype type)
+static inline struct vvp_io_args *ll_env_args(const struct lu_env *env)
 {
-       struct vvp_io_args *via = &ll_env_info(env)->lti_args;
-
-       via->via_io_subtype = type;
-
-       return via;
+       return &ll_env_info(env)->lti_args;
 }
 
 void ll_queue_done_writing(struct inode *inode, unsigned long flags);
index 5802da8..4464ad2 100644 (file)
@@ -49,14 +49,6 @@ struct obd_device;
 struct obd_export;
 struct page;
 
-/* specific architecture can implement only part of this list */
-enum vvp_io_subtype {
-       /** normal IO */
-       IO_NORMAL,
-       /** io started from splice_{read|write} */
-       IO_SPLICE
-};
-
 /**
  * IO state private to IO state private to VVP layer.
  */
@@ -98,10 +90,6 @@ struct vvp_io {
                         */
                        bool            ft_flags_valid;
                } fault;
-               struct {
-                       struct pipe_inode_info  *vui_pipe;
-                       unsigned int             vui_flags;
-               } splice;
                struct {
                        struct cl_page_list vui_queue;
                        unsigned long vui_written;
@@ -110,8 +98,6 @@ struct vvp_io {
                } write;
        } u;
 
-       enum vvp_io_subtype     vui_io_subtype;
-
        /**
         * Layout version when this IO is initialized
         */
index 2ab4503..2b7f182 100644 (file)
@@ -53,18 +53,6 @@ static struct vvp_io *cl2vvp_io(const struct lu_env *env,
        return vio;
 }
 
-/**
- * True, if \a io is a normal io, False for splice_{read,write}
- */
-static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
-{
-       struct vvp_io *vio = vvp_env_io(env);
-
-       LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
-
-       return vio->vui_io_subtype == IO_NORMAL;
-}
-
 /**
  * For swapping layout. The file's layout may have changed.
  * To avoid populating pages to a wrong stripe, we have to verify the
@@ -390,9 +378,6 @@ static int vvp_mmap_locks(const struct lu_env *env,
 
        LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
 
-       if (!cl_is_normalio(env, io))
-               return 0;
-
        if (!vio->vui_iter) /* nfs or loop back device write */
                return 0;
 
@@ -461,15 +446,10 @@ static void vvp_io_advance(const struct lu_env *env,
                           const struct cl_io_slice *ios,
                           size_t nob)
 {
-       struct vvp_io    *vio = cl2vvp_io(env, ios);
-       struct cl_io     *io  = ios->cis_io;
        struct cl_object *obj = ios->cis_io->ci_obj;
-
+       struct vvp_io    *vio = cl2vvp_io(env, ios);
        CLOBINVRNT(env, obj, vvp_object_invariant(obj));
 
-       if (!cl_is_normalio(env, io))
-               return;
-
        iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count  -= nob);
 }
 
@@ -478,7 +458,7 @@ static void vvp_io_update_iov(const struct lu_env *env,
 {
        size_t size = io->u.ci_rw.crw_count;
 
-       if (!cl_is_normalio(env, io) || !vio->vui_iter)
+       if (!vio->vui_iter)
                return;
 
        iov_iter_truncate(vio->vui_iter, size);
@@ -715,25 +695,8 @@ static int vvp_io_read_start(const struct lu_env *env,
 
        /* BUG: 5972 */
        file_accessed(file);
-       switch (vio->vui_io_subtype) {
-       case IO_NORMAL:
-               LASSERT(vio->vui_iocb->ki_pos == pos);
-               result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
-               break;
-       case IO_SPLICE:
-               result = generic_file_splice_read(file, &pos,
-                                                 vio->u.splice.vui_pipe, cnt,
-                                                 vio->u.splice.vui_flags);
-               /* LU-1109: do splice read stripe by stripe otherwise if it
-                * may make nfsd stuck if this read occupied all internal pipe
-                * buffers.
-                */
-               io->ci_continue = 0;
-               break;
-       default:
-               CERROR("Wrong IO type %u\n", vio->vui_io_subtype);
-               LBUG();
-       }
+       LASSERT(vio->vui_iocb->ki_pos == pos);
+       result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
 
 out:
        if (result >= 0) {
index f47c748..8415d4f 100644 (file)
@@ -37,27 +37,6 @@ coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos);
 }
 
-static ssize_t
-coda_file_splice_read(struct file *coda_file, loff_t *ppos,
-                     struct pipe_inode_info *pipe, size_t count,
-                     unsigned int flags)
-{
-       ssize_t (*splice_read)(struct file *, loff_t *,
-                              struct pipe_inode_info *, size_t, unsigned int);
-       struct coda_file_info *cfi;
-       struct file *host_file;
-
-       cfi = CODA_FTOC(coda_file);
-       BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
-       host_file = cfi->cfi_container;
-
-       splice_read = host_file->f_op->splice_read;
-       if (!splice_read)
-               splice_read = default_file_splice_read;
-
-       return splice_read(host_file, ppos, pipe, count, flags);
-}
-
 static ssize_t
 coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
 {
@@ -225,6 +204,6 @@ const struct file_operations coda_file_operations = {
        .open           = coda_open,
        .release        = coda_release,
        .fsync          = coda_fsync,
-       .splice_read    = coda_file_splice_read,
+       .splice_read    = generic_file_splice_read,
 };
 
index 7c3ce73..fb9aa16 100644 (file)
@@ -246,6 +246,9 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
                if ((dio->op == REQ_OP_READ) &&
                    ((offset + transferred) > dio->i_size))
                        transferred = dio->i_size - offset;
+               /* ignore EFAULT if some IO has been done */
+               if (unlikely(ret == -EFAULT) && transferred)
+                       ret = 0;
        }
 
        if (ret == 0)
index c41bde2..70ea57c 100644 (file)
@@ -728,7 +728,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
                struct pipe_buffer *buf = cs->pipebufs;
 
                if (!cs->write) {
-                       err = buf->ops->confirm(cs->pipe, buf);
+                       err = pipe_buf_confirm(cs->pipe, buf);
                        if (err)
                                return err;
 
@@ -827,7 +827,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 
        fuse_copy_finish(cs);
 
-       err = buf->ops->confirm(cs->pipe, buf);
+       err = pipe_buf_confirm(cs->pipe, buf);
        if (err)
                return err;
 
@@ -840,7 +840,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
        if (cs->len != PAGE_SIZE)
                goto out_fallback;
 
-       if (buf->ops->steal(cs->pipe, buf) != 0)
+       if (pipe_buf_steal(cs->pipe, buf) != 0)
                goto out_fallback;
 
        newpage = buf->page;
@@ -1341,9 +1341,8 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
                                    struct pipe_inode_info *pipe,
                                    size_t len, unsigned int flags)
 {
-       int ret;
+       int total, ret;
        int page_nr = 0;
-       int do_wakeup = 0;
        struct pipe_buffer *bufs;
        struct fuse_copy_state cs;
        struct fuse_dev *fud = fuse_get_dev(in);
@@ -1362,52 +1361,23 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
        if (ret < 0)
                goto out;
 
-       ret = 0;
-       pipe_lock(pipe);
-
-       if (!pipe->readers) {
-               send_sig(SIGPIPE, current, 0);
-               if (!ret)
-                       ret = -EPIPE;
-               goto out_unlock;
-       }
-
        if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
                ret = -EIO;
-               goto out_unlock;
+               goto out;
        }
 
-       while (page_nr < cs.nr_segs) {
-               int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
-               struct pipe_buffer *buf = pipe->bufs + newbuf;
-
-               buf->page = bufs[page_nr].page;
-               buf->offset = bufs[page_nr].offset;
-               buf->len = bufs[page_nr].len;
+       for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
                /*
                 * Need to be careful about this.  Having buf->ops in module
                 * code can Oops if the buffer persists after module unload.
                 */
-               buf->ops = &nosteal_pipe_buf_ops;
-
-               pipe->nrbufs++;
-               page_nr++;
-               ret += buf->len;
-
-               if (pipe->files)
-                       do_wakeup = 1;
-       }
-
-out_unlock:
-       pipe_unlock(pipe);
-
-       if (do_wakeup) {
-               smp_mb();
-               if (waitqueue_active(&pipe->wait))
-                       wake_up_interruptible(&pipe->wait);
-               kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+               bufs[page_nr].ops = &nosteal_pipe_buf_ops;
+               ret = add_to_pipe(pipe, &bufs[page_nr++]);
+               if (unlikely(ret < 0))
+                       break;
        }
-
+       if (total)
+               ret = total;
 out:
        for (; page_nr < cs.nr_segs; page_nr++)
                put_page(bufs[page_nr].page);
@@ -1992,7 +1962,7 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
                        pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
                        pipe->nrbufs--;
                } else {
-                       ibuf->ops->get(pipe, ibuf);
+                       pipe_buf_get(pipe, ibuf);
                        *obuf = *ibuf;
                        obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
                        obuf->len = rem;
@@ -2014,10 +1984,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 
        ret = fuse_dev_do_write(fud, &cs, len);
 
-       for (idx = 0; idx < nbuf; idx++) {
-               struct pipe_buffer *buf = &bufs[idx];
-               buf->ops->release(pipe, buf);
-       }
+       for (idx = 0; idx < nbuf; idx++)
+               pipe_buf_release(pipe, &bufs[idx]);
+
 out:
        kfree(bufs);
        return ret;
index 360188f..e23ff70 100644 (file)
@@ -954,30 +954,6 @@ out_uninit:
        return ret;
 }
 
-static ssize_t gfs2_file_splice_read(struct file *in, loff_t *ppos,
-                                    struct pipe_inode_info *pipe, size_t len,
-                                    unsigned int flags)
-{
-       struct inode *inode = in->f_mapping->host;
-       struct gfs2_inode *ip = GFS2_I(inode);
-       struct gfs2_holder gh;
-       int ret;
-
-       inode_lock(inode);
-
-       ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
-       if (ret) {
-               inode_unlock(inode);
-               return ret;
-       }
-
-       gfs2_glock_dq_uninit(&gh);
-       inode_unlock(inode);
-
-       return generic_file_splice_read(in, ppos, pipe, len, flags);
-}
-
-
 static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
                                      struct file *out, loff_t *ppos,
                                      size_t len, unsigned int flags)
@@ -1140,7 +1116,7 @@ const struct file_operations gfs2_file_fops = {
        .fsync          = gfs2_fsync,
        .lock           = gfs2_lock,
        .flock          = gfs2_flock,
-       .splice_read    = gfs2_file_splice_read,
+       .splice_read    = generic_file_splice_read,
        .splice_write   = gfs2_file_splice_write,
        .setlease       = simple_nosetlease,
        .fallocate      = gfs2_fallocate,
@@ -1168,7 +1144,7 @@ const struct file_operations gfs2_file_fops_nolock = {
        .open           = gfs2_open,
        .release        = gfs2_release,
        .fsync          = gfs2_fsync,
-       .splice_read    = gfs2_file_splice_read,
+       .splice_read    = generic_file_splice_read,
        .splice_write   = gfs2_file_splice_write,
        .setlease       = generic_setlease,
        .fallocate      = gfs2_fallocate,
index ca699dd..2efbdde 100644 (file)
@@ -182,29 +182,6 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 }
 EXPORT_SYMBOL_GPL(nfs_file_read);
 
-ssize_t
-nfs_file_splice_read(struct file *filp, loff_t *ppos,
-                    struct pipe_inode_info *pipe, size_t count,
-                    unsigned int flags)
-{
-       struct inode *inode = file_inode(filp);
-       ssize_t res;
-
-       dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
-               filp, (unsigned long) count, (unsigned long long) *ppos);
-
-       nfs_start_io_read(inode);
-       res = nfs_revalidate_mapping(inode, filp->f_mapping);
-       if (!res) {
-               res = generic_file_splice_read(filp, ppos, pipe, count, flags);
-               if (res > 0)
-                       nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res);
-       }
-       nfs_end_io_read(inode);
-       return res;
-}
-EXPORT_SYMBOL_GPL(nfs_file_splice_read);
-
 int
 nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
 {
@@ -871,7 +848,7 @@ const struct file_operations nfs_file_operations = {
        .fsync          = nfs_file_fsync,
        .lock           = nfs_lock,
        .flock          = nfs_flock,
-       .splice_read    = nfs_file_splice_read,
+       .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .check_flags    = nfs_check_flags,
        .setlease       = simple_nosetlease,
index 74935a1..d7b062b 100644 (file)
@@ -365,8 +365,6 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *)
 int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
 loff_t nfs_file_llseek(struct file *, loff_t, int);
 ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
-ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
-                            size_t, unsigned int);
 int nfs_file_mmap(struct file *, struct vm_area_struct *);
 ssize_t nfs_file_write(struct kiocb *, struct iov_iter *);
 int nfs_file_release(struct inode *, struct file *);
index d085ad7..89a7795 100644 (file)
@@ -248,7 +248,7 @@ const struct file_operations nfs4_file_operations = {
        .fsync          = nfs_file_fsync,
        .lock           = nfs_lock,
        .flock          = nfs_flock,
-       .splice_read    = nfs_file_splice_read,
+       .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .check_flags    = nfs_check_flags,
        .setlease       = simple_nosetlease,
index 0b055bf..8f91639 100644 (file)
@@ -2321,36 +2321,6 @@ out_mutex:
        return ret;
 }
 
-static ssize_t ocfs2_file_splice_read(struct file *in,
-                                     loff_t *ppos,
-                                     struct pipe_inode_info *pipe,
-                                     size_t len,
-                                     unsigned int flags)
-{
-       int ret = 0, lock_level = 0;
-       struct inode *inode = file_inode(in);
-
-       trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
-                       (unsigned long long)OCFS2_I(inode)->ip_blkno,
-                       in->f_path.dentry->d_name.len,
-                       in->f_path.dentry->d_name.name, len);
-
-       /*
-        * See the comment in ocfs2_file_read_iter()
-        */
-       ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
-       if (ret < 0) {
-               mlog_errno(ret);
-               goto bail;
-       }
-       ocfs2_inode_unlock(inode, lock_level);
-
-       ret = generic_file_splice_read(in, ppos, pipe, len, flags);
-
-bail:
-       return ret;
-}
-
 static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
                                   struct iov_iter *to)
 {
@@ -2509,7 +2479,7 @@ const struct file_operations ocfs2_fops = {
 #endif
        .lock           = ocfs2_lock,
        .flock          = ocfs2_flock,
-       .splice_read    = ocfs2_file_splice_read,
+       .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .fallocate      = ocfs2_fallocate,
 };
@@ -2554,7 +2524,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
        .compat_ioctl   = ocfs2_compat_ioctl,
 #endif
        .flock          = ocfs2_flock,
-       .splice_read    = ocfs2_file_splice_read,
+       .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .fallocate      = ocfs2_fallocate,
 };
index f8f5fc5..0b58abc 100644 (file)
@@ -1314,8 +1314,6 @@ DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_write);
 
 DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_write);
 
-DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read);
-
 DEFINE_OCFS2_FILE_OPS(ocfs2_file_aio_read);
 
 DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file);
index 4ebe6b2..4fc422f 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -267,7 +267,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
                if (bufs) {
                        int curbuf = pipe->curbuf;
                        struct pipe_buffer *buf = pipe->bufs + curbuf;
-                       const struct pipe_buf_operations *ops = buf->ops;
                        size_t chars = buf->len;
                        size_t written;
                        int error;
@@ -275,7 +274,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
                        if (chars > total_len)
                                chars = total_len;
 
-                       error = ops->confirm(pipe, buf);
+                       error = pipe_buf_confirm(pipe, buf);
                        if (error) {
                                if (!ret)
                                        ret = error;
@@ -299,8 +298,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
                        }
 
                        if (!buf->len) {
-                               buf->ops = NULL;
-                               ops->release(pipe, buf);
+                               pipe_buf_release(pipe, buf);
                                curbuf = (curbuf + 1) & (pipe->buffers - 1);
                                pipe->curbuf = curbuf;
                                pipe->nrbufs = --bufs;
@@ -383,11 +381,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
                int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
                                                        (pipe->buffers - 1);
                struct pipe_buffer *buf = pipe->bufs + lastbuf;
-               const struct pipe_buf_operations *ops = buf->ops;
                int offset = buf->offset + buf->len;
 
-               if (ops->can_merge && offset + chars <= PAGE_SIZE) {
-                       ret = ops->confirm(pipe, buf);
+               if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) {
+                       ret = pipe_buf_confirm(pipe, buf);
                        if (ret)
                                goto out;
 
@@ -664,7 +661,7 @@ void free_pipe_info(struct pipe_inode_info *pipe)
        for (i = 0; i < pipe->buffers; i++) {
                struct pipe_buffer *buf = pipe->bufs + i;
                if (buf->ops)
-                       buf->ops->release(pipe, buf);
+                       pipe_buf_release(pipe, buf);
        }
        if (pipe->tmp_page)
                __free_page(pipe->tmp_page);
index dd9bf7e..aa38901 100644 (file)
@@ -183,82 +183,39 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
                       struct splice_pipe_desc *spd)
 {
        unsigned int spd_pages = spd->nr_pages;
-       int ret, do_wakeup, page_nr;
+       int ret = 0, page_nr = 0;
 
        if (!spd_pages)
                return 0;
 
-       ret = 0;
-       do_wakeup = 0;
-       page_nr = 0;
-
-       pipe_lock(pipe);
-
-       for (;;) {
-               if (!pipe->readers) {
-                       send_sig(SIGPIPE, current, 0);
-                       if (!ret)
-                               ret = -EPIPE;
-                       break;
-               }
-
-               if (pipe->nrbufs < pipe->buffers) {
-                       int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
-                       struct pipe_buffer *buf = pipe->bufs + newbuf;
-
-                       buf->page = spd->pages[page_nr];
-                       buf->offset = spd->partial[page_nr].offset;
-                       buf->len = spd->partial[page_nr].len;
-                       buf->private = spd->partial[page_nr].private;
-                       buf->ops = spd->ops;
-                       if (spd->flags & SPLICE_F_GIFT)
-                               buf->flags |= PIPE_BUF_FLAG_GIFT;
-
-                       pipe->nrbufs++;
-                       page_nr++;
-                       ret += buf->len;
-
-                       if (pipe->files)
-                               do_wakeup = 1;
+       if (unlikely(!pipe->readers)) {
+               send_sig(SIGPIPE, current, 0);
+               ret = -EPIPE;
+               goto out;
+       }
 
-                       if (!--spd->nr_pages)
-                               break;
-                       if (pipe->nrbufs < pipe->buffers)
-                               continue;
+       while (pipe->nrbufs < pipe->buffers) {
+               int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+               struct pipe_buffer *buf = pipe->bufs + newbuf;
 
-                       break;
-               }
+               buf->page = spd->pages[page_nr];
+               buf->offset = spd->partial[page_nr].offset;
+               buf->len = spd->partial[page_nr].len;
+               buf->private = spd->partial[page_nr].private;
+               buf->ops = spd->ops;
 
-               if (spd->flags & SPLICE_F_NONBLOCK) {
-                       if (!ret)
-                               ret = -EAGAIN;
-                       break;
-               }
+               pipe->nrbufs++;
+               page_nr++;
+               ret += buf->len;
 
-               if (signal_pending(current)) {
-                       if (!ret)
-                               ret = -ERESTARTSYS;
+               if (!--spd->nr_pages)
                        break;
-               }
-
-               if (do_wakeup) {
-                       smp_mb();
-                       if (waitqueue_active(&pipe->wait))
-                               wake_up_interruptible_sync(&pipe->wait);
-                       kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
-                       do_wakeup = 0;
-               }
-
-               pipe->waiting_writers++;
-               pipe_wait(pipe);
-               pipe->waiting_writers--;
        }
 
-       pipe_unlock(pipe);
-
-       if (do_wakeup)
-               wakeup_pipe_readers(pipe);
+       if (!ret)
+               ret = -EAGAIN;
 
+out:
        while (page_nr < spd_pages)
                spd->spd_release(spd, page_nr++);
 
@@ -266,6 +223,26 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 }
 EXPORT_SYMBOL_GPL(splice_to_pipe);
 
+ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+{
+       int ret;
+
+       if (unlikely(!pipe->readers)) {
+               send_sig(SIGPIPE, current, 0);
+               ret = -EPIPE;
+       } else if (pipe->nrbufs == pipe->buffers) {
+               ret = -EAGAIN;
+       } else {
+               int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+               pipe->bufs[newbuf] = *buf;
+               pipe->nrbufs++;
+               return buf->len;
+       }
+       pipe_buf_release(pipe, buf);
+       return ret;
+}
+EXPORT_SYMBOL(add_to_pipe);
+
 void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
 {
        put_page(spd->pages[i]);
@@ -303,207 +280,6 @@ void splice_shrink_spd(struct splice_pipe_desc *spd)
        kfree(spd->partial);
 }
 
-static int
-__generic_file_splice_read(struct file *in, loff_t *ppos,
-                          struct pipe_inode_info *pipe, size_t len,
-                          unsigned int flags)
-{
-       struct address_space *mapping = in->f_mapping;
-       unsigned int loff, nr_pages, req_pages;
-       struct page *pages[PIPE_DEF_BUFFERS];
-       struct partial_page partial[PIPE_DEF_BUFFERS];
-       struct page *page;
-       pgoff_t index, end_index;
-       loff_t isize;
-       int error, page_nr;
-       struct splice_pipe_desc spd = {
-               .pages = pages,
-               .partial = partial,
-               .nr_pages_max = PIPE_DEF_BUFFERS,
-               .flags = flags,
-               .ops = &page_cache_pipe_buf_ops,
-               .spd_release = spd_release_page,
-       };
-
-       if (splice_grow_spd(pipe, &spd))
-               return -ENOMEM;
-
-       index = *ppos >> PAGE_SHIFT;
-       loff = *ppos & ~PAGE_MASK;
-       req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       nr_pages = min(req_pages, spd.nr_pages_max);
-
-       /*
-        * Lookup the (hopefully) full range of pages we need.
-        */
-       spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages);
-       index += spd.nr_pages;
-
-       /*
-        * If find_get_pages_contig() returned fewer pages than we needed,
-        * readahead/allocate the rest and fill in the holes.
-        */
-       if (spd.nr_pages < nr_pages)
-               page_cache_sync_readahead(mapping, &in->f_ra, in,
-                               index, req_pages - spd.nr_pages);
-
-       error = 0;
-       while (spd.nr_pages < nr_pages) {
-               /*
-                * Page could be there, find_get_pages_contig() breaks on
-                * the first hole.
-                */
-               page = find_get_page(mapping, index);
-               if (!page) {
-                       /*
-                        * page didn't exist, allocate one.
-                        */
-                       page = page_cache_alloc_cold(mapping);
-                       if (!page)
-                               break;
-
-                       error = add_to_page_cache_lru(page, mapping, index,
-                                  mapping_gfp_constraint(mapping, GFP_KERNEL));
-                       if (unlikely(error)) {
-                               put_page(page);
-                               if (error == -EEXIST)
-                                       continue;
-                               break;
-                       }
-                       /*
-                        * add_to_page_cache() locks the page, unlock it
-                        * to avoid convoluting the logic below even more.
-                        */
-                       unlock_page(page);
-               }
-
-               spd.pages[spd.nr_pages++] = page;
-               index++;
-       }
-
-       /*
-        * Now loop over the map and see if we need to start IO on any
-        * pages, fill in the partial map, etc.
-        */
-       index = *ppos >> PAGE_SHIFT;
-       nr_pages = spd.nr_pages;
-       spd.nr_pages = 0;
-       for (page_nr = 0; page_nr < nr_pages; page_nr++) {
-               unsigned int this_len;
-
-               if (!len)
-                       break;
-
-               /*
-                * this_len is the max we'll use from this page
-                */
-               this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
-               page = spd.pages[page_nr];
-
-               if (PageReadahead(page))
-                       page_cache_async_readahead(mapping, &in->f_ra, in,
-                                       page, index, req_pages - page_nr);
-
-               /*
-                * If the page isn't uptodate, we may need to start io on it
-                */
-               if (!PageUptodate(page)) {
-                       lock_page(page);
-
-                       /*
-                        * Page was truncated, or invalidated by the
-                        * filesystem.  Redo the find/create, but this time the
-                        * page is kept locked, so there's no chance of another
-                        * race with truncate/invalidate.
-                        */
-                       if (!page->mapping) {
-                               unlock_page(page);
-retry_lookup:
-                               page = find_or_create_page(mapping, index,
-                                               mapping_gfp_mask(mapping));
-
-                               if (!page) {
-                                       error = -ENOMEM;
-                                       break;
-                               }
-                               put_page(spd.pages[page_nr]);
-                               spd.pages[page_nr] = page;
-                       }
-                       /*
-                        * page was already under io and is now done, great
-                        */
-                       if (PageUptodate(page)) {
-                               unlock_page(page);
-                               goto fill_it;
-                       }
-
-                       /*
-                        * need to read in the page
-                        */
-                       error = mapping->a_ops->readpage(in, page);
-                       if (unlikely(error)) {
-                               /*
-                                * Re-lookup the page
-                                */
-                               if (error == AOP_TRUNCATED_PAGE)
-                                       goto retry_lookup;
-
-                               break;
-                       }
-               }
-fill_it:
-               /*
-                * i_size must be checked after PageUptodate.
-                */
-               isize = i_size_read(mapping->host);
-               end_index = (isize - 1) >> PAGE_SHIFT;
-               if (unlikely(!isize || index > end_index))
-                       break;
-
-               /*
-                * if this is the last page, see if we need to shrink
-                * the length and stop
-                */
-               if (end_index == index) {
-                       unsigned int plen;
-
-                       /*
-                        * max good bytes in this page
-                        */
-                       plen = ((isize - 1) & ~PAGE_MASK) + 1;
-                       if (plen <= loff)
-                               break;
-
-                       /*
-                        * force quit after adding this page
-                        */
-                       this_len = min(this_len, plen - loff);
-                       len = this_len;
-               }
-
-               spd.partial[page_nr].offset = loff;
-               spd.partial[page_nr].len = this_len;
-               len -= this_len;
-               loff = 0;
-               spd.nr_pages++;
-               index++;
-       }
-
-       /*
-        * Release any pages at the end, if we quit early. 'page_nr' is how far
-        * we got, 'nr_pages' is how many pages are in the map.
-        */
-       while (page_nr < nr_pages)
-               put_page(spd.pages[page_nr++]);
-       in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
-
-       if (spd.nr_pages)
-               error = splice_to_pipe(pipe, &spd);
-
-       splice_shrink_spd(&spd);
-       return error;
-}
-
 /**
  * generic_file_splice_read - splice data from file to a pipe
  * @in:                file to splice from
@@ -514,39 +290,53 @@ fill_it:
  *
  * Description:
  *    Will read pages from given file and fill them into a pipe. Can be
- *    used as long as the address_space operations for the source implements
- *    a readpage() hook.
+ *    used as long as it has more or less sane ->read_iter().
  *
  */
 ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
                                 struct pipe_inode_info *pipe, size_t len,
                                 unsigned int flags)
 {
-       loff_t isize, left;
-       int ret;
-
-       if (IS_DAX(in->f_mapping->host))
-               return default_file_splice_read(in, ppos, pipe, len, flags);
+       struct iov_iter to;
+       struct kiocb kiocb;
+       loff_t isize;
+       int idx, ret;
 
        isize = i_size_read(in->f_mapping->host);
        if (unlikely(*ppos >= isize))
                return 0;
 
-       left = isize - *ppos;
-       if (unlikely(left < len))
-               len = left;
-
-       ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
+       iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
+       idx = to.idx;
+       init_sync_kiocb(&kiocb, in);
+       kiocb.ki_pos = *ppos;
+       ret = in->f_op->read_iter(&kiocb, &to);
        if (ret > 0) {
-               *ppos += ret;
+               *ppos = kiocb.ki_pos;
                file_accessed(in);
+       } else if (ret < 0) {
+               if (WARN_ON(to.idx != idx || to.iov_offset)) {
+                       /*
+                        * a bogus ->read_iter() has copied something and still
+                        * returned an error instead of a short read.
+                        */
+                       to.idx = idx;
+                       to.iov_offset = 0;
+                       iov_iter_advance(&to, 0); /* to free what was emitted */
+               }
+               /*
+                * callers of ->splice_read() expect -EAGAIN on
+                * "can't put anything in there", rather than -EFAULT.
+                */
+               if (ret == -EFAULT)
+                       ret = -EAGAIN;
        }
 
        return ret;
 }
 EXPORT_SYMBOL(generic_file_splice_read);
 
-static const struct pipe_buf_operations default_pipe_buf_ops = {
+const struct pipe_buf_operations default_pipe_buf_ops = {
        .can_merge = 0,
        .confirm = generic_pipe_buf_confirm,
        .release = generic_pipe_buf_release,
@@ -570,7 +360,7 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {
 };
 EXPORT_SYMBOL(nosteal_pipe_buf_ops);
 
-static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
+static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
                            unsigned long vlen, loff_t offset)
 {
        mm_segment_t old_fs;
@@ -602,102 +392,70 @@ ssize_t kernel_write(struct file *file, const char *buf, size_t count,
 }
 EXPORT_SYMBOL(kernel_write);
 
-ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
                                 struct pipe_inode_info *pipe, size_t len,
                                 unsigned int flags)
 {
+       struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
+       struct iov_iter to;
+       struct page **pages;
        unsigned int nr_pages;
-       unsigned int nr_freed;
-       size_t offset;
-       struct page *pages[PIPE_DEF_BUFFERS];
-       struct partial_page partial[PIPE_DEF_BUFFERS];
-       struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
+       size_t offset, dummy, copied = 0;
        ssize_t res;
-       size_t this_len;
-       int error;
        int i;
-       struct splice_pipe_desc spd = {
-               .pages = pages,
-               .partial = partial,
-               .nr_pages_max = PIPE_DEF_BUFFERS,
-               .flags = flags,
-               .ops = &default_pipe_buf_ops,
-               .spd_release = spd_release_page,
-       };
 
-       if (splice_grow_spd(pipe, &spd))
+       if (pipe->nrbufs == pipe->buffers)
+               return -EAGAIN;
+
+       /*
+        * Try to keep page boundaries matching to source pagecache ones -
+        * it probably won't be much help, but...
+        */
+       offset = *ppos & ~PAGE_MASK;
+
+       iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
+
+       res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy);
+       if (res <= 0)
                return -ENOMEM;
 
-       res = -ENOMEM;
+       nr_pages = res / PAGE_SIZE;
+
        vec = __vec;
-       if (spd.nr_pages_max > PIPE_DEF_BUFFERS) {
-               vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);
-               if (!vec)
-                       goto shrink_ret;
+       if (nr_pages > PIPE_DEF_BUFFERS) {
+               vec = kmalloc(nr_pages * sizeof(struct kvec), GFP_KERNEL);
+               if (unlikely(!vec)) {
+                       res = -ENOMEM;
+                       goto out;
+               }
        }
 
-       offset = *ppos & ~PAGE_MASK;
-       nr_pages = (len + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
-       for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
-               struct page *page;
+       pipe->bufs[to.idx].offset = offset;
+       pipe->bufs[to.idx].len -= offset;
 
-               page = alloc_page(GFP_USER);
-               error = -ENOMEM;
-               if (!page)
-                       goto err;
-
-               this_len = min_t(size_t, len, PAGE_SIZE - offset);
-               vec[i].iov_base = (void __user *) page_address(page);
+       for (i = 0; i < nr_pages; i++) {
+               size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
+               vec[i].iov_base = page_address(pages[i]) + offset;
                vec[i].iov_len = this_len;
-               spd.pages[i] = page;
-               spd.nr_pages++;
                len -= this_len;
                offset = 0;
        }
 
-       res = kernel_readv(in, vec, spd.nr_pages, *ppos);
-       if (res < 0) {
-               error = res;
-               goto err;
-       }
-
-       error = 0;
-       if (!res)
-               goto err;
-
-       nr_freed = 0;
-       for (i = 0; i < spd.nr_pages; i++) {
-               this_len = min_t(size_t, vec[i].iov_len, res);
-               spd.partial[i].offset = 0;
-               spd.partial[i].len = this_len;
-               if (!this_len) {
-                       __free_page(spd.pages[i]);
-                       spd.pages[i] = NULL;
-                       nr_freed++;
-               }
-               res -= this_len;
-       }
-       spd.nr_pages -= nr_freed;
-
-       res = splice_to_pipe(pipe, &spd);
-       if (res > 0)
+       res = kernel_readv(in, vec, nr_pages, *ppos);
+       if (res > 0) {
+               copied = res;
                *ppos += res;
+       }
 
-shrink_ret:
        if (vec != __vec)
                kfree(vec);
-       splice_shrink_spd(&spd);
+out:
+       for (i = 0; i < nr_pages; i++)
+               put_page(pages[i]);
+       kvfree(pages);
+       iov_iter_advance(&to, copied);  /* truncates and discards */
        return res;
-
-err:
-       for (i = 0; i < spd.nr_pages; i++)
-               __free_page(spd.pages[i]);
-
-       res = error;
-       goto shrink_ret;
 }
-EXPORT_SYMBOL(default_file_splice_read);
 
 /*
  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
@@ -757,13 +515,12 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
 
        while (pipe->nrbufs) {
                struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
-               const struct pipe_buf_operations *ops = buf->ops;
 
                sd->len = buf->len;
                if (sd->len > sd->total_len)
                        sd->len = sd->total_len;
 
-               ret = buf->ops->confirm(pipe, buf);
+               ret = pipe_buf_confirm(pipe, buf);
                if (unlikely(ret)) {
                        if (ret == -ENODATA)
                                ret = 0;
@@ -783,8 +540,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
                sd->total_len -= ret;
 
                if (!buf->len) {
-                       buf->ops = NULL;
-                       ops->release(pipe, buf);
+                       pipe_buf_release(pipe, buf);
                        pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
                        pipe->nrbufs--;
                        if (pipe->files)
@@ -1003,7 +759,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                        if (idx == pipe->buffers - 1)
                                idx = -1;
 
-                       ret = buf->ops->confirm(pipe, buf);
+                       ret = pipe_buf_confirm(pipe, buf);
                        if (unlikely(ret)) {
                                if (ret == -ENODATA)
                                        ret = 0;
@@ -1030,11 +786,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                while (ret) {
                        struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
                        if (ret >= buf->len) {
-                               const struct pipe_buf_operations *ops = buf->ops;
                                ret -= buf->len;
                                buf->len = 0;
-                               buf->ops = NULL;
-                               ops->release(pipe, buf);
+                               pipe_buf_release(pipe, buf);
                                pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
                                pipe->nrbufs--;
                                if (pipe->files)
@@ -1273,10 +1027,8 @@ out_release:
        for (i = 0; i < pipe->buffers; i++) {
                struct pipe_buffer *buf = pipe->bufs + i;
 
-               if (buf->ops) {
-                       buf->ops->release(pipe, buf);
-                       buf->ops = NULL;
-               }
+               if (buf->ops)
+                       pipe_buf_release(pipe, buf);
        }
 
        if (!bytes)
@@ -1342,6 +1094,20 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 }
 EXPORT_SYMBOL(do_splice_direct);
 
+static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
+{
+       while (pipe->nrbufs == pipe->buffers) {
+               if (flags & SPLICE_F_NONBLOCK)
+                       return -EAGAIN;
+               if (signal_pending(current))
+                       return -ERESTARTSYS;
+               pipe->waiting_writers++;
+               pipe_wait(pipe);
+               pipe->waiting_writers--;
+       }
+       return 0;
+}
+
 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
                               struct pipe_inode_info *opipe,
                               size_t len, unsigned int flags);
@@ -1424,8 +1190,13 @@ static long do_splice(struct file *in, loff_t __user *off_in,
                        offset = in->f_pos;
                }
 
-               ret = do_splice_to(in, &offset, opipe, len, flags);
-
+               pipe_lock(opipe);
+               ret = wait_for_space(opipe, flags);
+               if (!ret)
+                       ret = do_splice_to(in, &offset, opipe, len, flags);
+               pipe_unlock(opipe);
+               if (ret > 0)
+                       wakeup_pipe_readers(opipe);
                if (!off_in)
                        in->f_pos = offset;
                else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
@@ -1437,106 +1208,50 @@ static long do_splice(struct file *in, loff_t __user *off_in,
        return -EINVAL;
 }
 
-/*
- * Map an iov into an array of pages and offset/length tupples. With the
- * partial_page structure, we can map several non-contiguous ranges into
- * our ones pages[] map instead of splitting that operation into pieces.
- * Could easily be exported as a generic helper for other users, in which
- * case one would probably want to add a 'max_nr_pages' parameter as well.
- */
-static int get_iovec_page_array(const struct iovec __user *iov,
-                               unsigned int nr_vecs, struct page **pages,
-                               struct partial_page *partial, bool aligned,
-                               unsigned int pipe_buffers)
+static int iter_to_pipe(struct iov_iter *from,
+                       struct pipe_inode_info *pipe,
+                       unsigned flags)
 {
-       int buffers = 0, error = 0;
-
-       while (nr_vecs) {
-               unsigned long off, npages;
-               struct iovec entry;
-               void __user *base;
-               size_t len;
-               int i;
-
-               error = -EFAULT;
-               if (copy_from_user(&entry, iov, sizeof(entry)))
-                       break;
-
-               base = entry.iov_base;
-               len = entry.iov_len;
-
-               /*
-                * Sanity check this iovec. 0 read succeeds.
-                */
-               error = 0;
-               if (unlikely(!len))
-                       break;
-               error = -EFAULT;
-               if (!access_ok(VERIFY_READ, base, len))
-                       break;
-
-               /*
-                * Get this base offset and number of pages, then map
-                * in the user pages.
-                */
-               off = (unsigned long) base & ~PAGE_MASK;
-
-               /*
-                * If asked for alignment, the offset must be zero and the
-                * length a multiple of the PAGE_SIZE.
-                */
-               error = -EINVAL;
-               if (aligned && (off || len & ~PAGE_MASK))
-                       break;
-
-               npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-               if (npages > pipe_buffers - buffers)
-                       npages = pipe_buffers - buffers;
-
-               error = get_user_pages_fast((unsigned long)base, npages,
-                                       0, &pages[buffers]);
-
-               if (unlikely(error <= 0))
+       struct pipe_buffer buf = {
+               .ops = &user_page_pipe_buf_ops,
+               .flags = flags
+       };
+       size_t total = 0;
+       int ret = 0;
+       bool failed = false;
+
+       while (iov_iter_count(from) && !failed) {
+               struct page *pages[16];
+               ssize_t copied;
+               size_t start;
+               int n;
+
+               copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start);
+               if (copied <= 0) {
+                       ret = copied;
                        break;
-
-               /*
-                * Fill this contiguous range into the partial page map.
-                */
-               for (i = 0; i < error; i++) {
-                       const int plen = min_t(size_t, len, PAGE_SIZE - off);
-
-                       partial[buffers].offset = off;
-                       partial[buffers].len = plen;
-
-                       off = 0;
-                       len -= plen;
-                       buffers++;
                }
 
-               /*
-                * We didn't complete this iov, stop here since it probably
-                * means we have to move some of this into a pipe to
-                * be able to continue.
-                */
-               if (len)
-                       break;
-
-               /*
-                * Don't continue if we mapped fewer pages than we asked for,
-                * or if we mapped the max number of pages that we have
-                * room for.
-                */
-               if (error < npages || buffers == pipe_buffers)
-                       break;
-
-               nr_vecs--;
-               iov++;
+               for (n = 0; copied; n++, start = 0) {
+                       int size = min_t(int, copied, PAGE_SIZE - start);
+                       if (!failed) {
+                               buf.page = pages[n];
+                               buf.offset = start;
+                               buf.len = size;
+                               ret = add_to_pipe(pipe, &buf);
+                               if (unlikely(ret < 0)) {
+                                       failed = true;
+                               } else {
+                                       iov_iter_advance(from, ret);
+                                       total += ret;
+                               }
+                       } else {
+                               put_page(pages[n]);
+                       }
+                       copied -= size;
+               }
        }
-
-       if (buffers)
-               return buffers;
-
-       return error;
+       return total ? total : ret;
 }
 
 static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
@@ -1590,38 +1305,36 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
  * as splice-from-memory, where the regular splice is splice-from-file (or
  * to file). In both cases the output is a pipe, naturally.
  */
-static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
+static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov,
                             unsigned long nr_segs, unsigned int flags)
 {
        struct pipe_inode_info *pipe;
-       struct page *pages[PIPE_DEF_BUFFERS];
-       struct partial_page partial[PIPE_DEF_BUFFERS];
-       struct splice_pipe_desc spd = {
-               .pages = pages,
-               .partial = partial,
-               .nr_pages_max = PIPE_DEF_BUFFERS,
-               .flags = flags,
-               .ops = &user_page_pipe_buf_ops,
-               .spd_release = spd_release_page,
-       };
+       struct iovec iovstack[UIO_FASTIOV];
+       struct iovec *iov = iovstack;
+       struct iov_iter from;
        long ret;
+       unsigned buf_flag = 0;
+
+       if (flags & SPLICE_F_GIFT)
+               buf_flag = PIPE_BUF_FLAG_GIFT;
 
        pipe = get_pipe_info(file);
        if (!pipe)
                return -EBADF;
 
-       if (splice_grow_spd(pipe, &spd))
-               return -ENOMEM;
-
-       spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
-                                           spd.partial, false,
-                                           spd.nr_pages_max);
-       if (spd.nr_pages <= 0)
-               ret = spd.nr_pages;
-       else
-               ret = splice_to_pipe(pipe, &spd);
+       ret = import_iovec(WRITE, uiov, nr_segs,
+                          ARRAY_SIZE(iovstack), &iov, &from);
+       if (ret < 0)
+               return ret;
 
-       splice_shrink_spd(&spd);
+       pipe_lock(pipe);
+       ret = wait_for_space(pipe, flags);
+       if (!ret)
+               ret = iter_to_pipe(&from, pipe, buf_flag);
+       pipe_unlock(pipe);
+       if (ret > 0)
+               wakeup_pipe_readers(pipe);
+       kfree(iov);
        return ret;
 }
 
@@ -1876,7 +1589,7 @@ retry:
                         * Get a reference to this pipe buffer,
                         * so we can copy the contents over.
                         */
-                       ibuf->ops->get(ipipe, ibuf);
+                       pipe_buf_get(ipipe, ibuf);
                        *obuf = *ibuf;
 
                        /*
@@ -1948,7 +1661,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
                 * Get a reference to this pipe buffer,
                 * so we can copy the contents over.
                 */
-               ibuf->ops->get(ipipe, ibuf);
+               pipe_buf_get(ipipe, ibuf);
 
                obuf = opipe->bufs + nbuf;
                *obuf = *ibuf;
index c68517b..f46b292 100644 (file)
@@ -393,45 +393,6 @@ xfs_file_read_iter(
        return ret;
 }
 
-STATIC ssize_t
-xfs_file_splice_read(
-       struct file             *infilp,
-       loff_t                  *ppos,
-       struct pipe_inode_info  *pipe,
-       size_t                  count,
-       unsigned int            flags)
-{
-       struct xfs_inode        *ip = XFS_I(infilp->f_mapping->host);
-       ssize_t                 ret;
-
-       XFS_STATS_INC(ip->i_mount, xs_read_calls);
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-
-       trace_xfs_file_splice_read(ip, count, *ppos);
-
-       /*
-        * DAX inodes cannot ues the page cache for splice, so we have to push
-        * them through the VFS IO path. This means it goes through
-        * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
-        * cannot lock the splice operation at this level for DAX inodes.
-        */
-       if (IS_DAX(VFS_I(ip))) {
-               ret = default_file_splice_read(infilp, ppos, pipe, count,
-                                              flags);
-               goto out;
-       }
-
-       xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-       ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-out:
-       if (ret > 0)
-               XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
-       return ret;
-}
-
 /*
  * Zero any on disk space between the current EOF and the new, larger EOF.
  *
@@ -1608,7 +1569,7 @@ const struct file_operations xfs_file_operations = {
        .llseek         = xfs_file_llseek,
        .read_iter      = xfs_file_read_iter,
        .write_iter     = xfs_file_write_iter,
-       .splice_read    = xfs_file_splice_read,
+       .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .unlocked_ioctl = xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
index c6b2b1d..16093c7 100644 (file)
@@ -1170,7 +1170,6 @@ DEFINE_RW_EVENT(xfs_file_dax_read);
 DEFINE_RW_EVENT(xfs_file_buffered_write);
 DEFINE_RW_EVENT(xfs_file_direct_write);
 DEFINE_RW_EVENT(xfs_file_dax_write);
-DEFINE_RW_EVENT(xfs_file_splice_read);
 
 DECLARE_EVENT_CLASS(xfs_page_class,
        TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
index 901e25d..b04883e 100644 (file)
@@ -2794,8 +2794,6 @@ extern void block_sync_page(struct page *page);
 /* fs/splice.c */
 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
                struct pipe_inode_info *, size_t, unsigned int);
-extern ssize_t default_file_splice_read(struct file *, loff_t *,
-               struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
                struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
index 24f5470..e7497c9 100644 (file)
@@ -66,15 +66,10 @@ struct pipe_inode_info {
  *
  * ->confirm()
  *     ->steal()
- *     ...
- *     ->map()
- *     ...
- *     ->unmap()
  *
- * That is, ->map() must be called on a confirmed buffer,
- * same goes for ->steal(). See below for the meaning of each
- * operation. Also see kerneldoc in fs/pipe.c for the pipe
- * and generic variants of these hooks.
+ * That is, ->steal() must be called on a confirmed buffer.
+ * See below for the meaning of each operation. Also see kerneldoc
+ * in fs/pipe.c for the pipe and generic variants of these hooks.
  */
 struct pipe_buf_operations {
        /*
@@ -115,6 +110,53 @@ struct pipe_buf_operations {
        void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
 };
 
+/**
+ * pipe_buf_get - get a reference to a pipe_buffer
+ * @pipe:      the pipe that the buffer belongs to
+ * @buf:       the buffer to get a reference to
+ */
+static inline void pipe_buf_get(struct pipe_inode_info *pipe,
+                               struct pipe_buffer *buf)
+{
+       buf->ops->get(pipe, buf);
+}
+
+/**
+ * pipe_buf_release - put a reference to a pipe_buffer
+ * @pipe:      the pipe that the buffer belongs to
+ * @buf:       the buffer to put a reference to
+ */
+static inline void pipe_buf_release(struct pipe_inode_info *pipe,
+                                   struct pipe_buffer *buf)
+{
+       const struct pipe_buf_operations *ops = buf->ops;
+
+       buf->ops = NULL;
+       ops->release(pipe, buf);
+}
+
+/**
+ * pipe_buf_confirm - verify contents of the pipe buffer
+ * @pipe:      the pipe that the buffer belongs to
+ * @buf:       the buffer to confirm
+ */
+static inline int pipe_buf_confirm(struct pipe_inode_info *pipe,
+                                  struct pipe_buffer *buf)
+{
+       return buf->ops->confirm(pipe, buf);
+}
+
+/**
+ * pipe_buf_steal - attempt to take ownership of a pipe_buffer
+ * @pipe:      the pipe that the buffer belongs to
+ * @buf:       the buffer to attempt to steal
+ */
+static inline int pipe_buf_steal(struct pipe_inode_info *pipe,
+                                struct pipe_buffer *buf)
+{
+       return buf->ops->steal(pipe, buf);
+}
+
 /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
    memory allocation, whereas PIPE_BUF makes atomicity guarantees.  */
 #define PIPE_SIZE              PAGE_SIZE
@@ -129,7 +171,6 @@ extern unsigned long pipe_user_pages_hard;
 extern unsigned long pipe_user_pages_soft;
 int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 
-
 /* Drop the inode semaphore and wait for a pipe event, atomically */
 void pipe_wait(struct pipe_inode_info *pipe);
 
index 9bf60b5..601258f 100644 (file)
@@ -3064,15 +3064,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
 int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len);
 __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
                              int len, __wsum csum);
-ssize_t skb_socket_splice(struct sock *sk,
-                         struct pipe_inode_info *pipe,
-                         struct splice_pipe_desc *spd);
 int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
                    struct pipe_inode_info *pipe, unsigned int len,
-                   unsigned int flags,
-                   ssize_t (*splice_cb)(struct sock *,
-                                        struct pipe_inode_info *,
-                                        struct splice_pipe_desc *));
+                   unsigned int flags);
 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
 int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
index da2751d..00a2116 100644 (file)
@@ -72,6 +72,8 @@ extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
                                  struct splice_desc *, splice_actor *);
 extern ssize_t splice_to_pipe(struct pipe_inode_info *,
                              struct splice_pipe_desc *);
+extern ssize_t add_to_pipe(struct pipe_inode_info *,
+                             struct pipe_buffer *);
 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
                                      splice_direct_actor *);
 
@@ -83,4 +85,5 @@ extern void splice_shrink_spd(struct splice_pipe_desc *);
 extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
 
 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
+extern const struct pipe_buf_operations default_pipe_buf_ops;
 #endif
index 75b4aaf..b5ebe6d 100644 (file)
@@ -13,6 +13,7 @@
 #include <uapi/linux/uio.h>
 
 struct page;
+struct pipe_inode_info;
 
 struct kvec {
        void *iov_base; /* and that should *never* hold a userland pointer */
@@ -23,6 +24,7 @@ enum {
        ITER_IOVEC = 0,
        ITER_KVEC = 2,
        ITER_BVEC = 4,
+       ITER_PIPE = 8,
 };
 
 struct iov_iter {
@@ -33,8 +35,12 @@ struct iov_iter {
                const struct iovec *iov;
                const struct kvec *kvec;
                const struct bio_vec *bvec;
+               struct pipe_inode_info *pipe;
+       };
+       union {
+               unsigned long nr_segs;
+               int idx;
        };
-       unsigned long nr_segs;
 };
 
 /*
@@ -64,7 +70,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
 }
 
 #define iov_for_each(iov, iter, start)                         \
-       if (!((start).type & ITER_BVEC))                        \
+       if (!((start).type & (ITER_BVEC | ITER_PIPE)))          \
        for (iter = (start);                                    \
             (iter).count &&                                    \
             ((iov = iov_iter_iovec(&(iter))), 1);              \
@@ -94,6 +100,8 @@ void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
                        unsigned long nr_segs, size_t count);
 void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec,
                        unsigned long nr_segs, size_t count);
+void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe,
+                       size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
                        size_t maxsize, unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
@@ -109,7 +117,7 @@ static inline size_t iov_iter_count(struct iov_iter *i)
 
 static inline bool iter_is_iovec(struct iov_iter *i)
 {
-       return !(i->type & (ITER_BVEC | ITER_KVEC));
+       return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE));
 }
 
 /*
index fc9b4a4..9988f5c 100644 (file)
@@ -1108,51 +1108,23 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf,
        return end_pos;
 }
 
-/*
- *     subbuf_read_actor - read up to one subbuf's worth of data
- */
-static int subbuf_read_actor(size_t read_start,
-                            struct rchan_buf *buf,
-                            size_t avail,
-                            read_descriptor_t *desc)
-{
-       void *from;
-       int ret = 0;
-
-       from = buf->start + read_start;
-       ret = avail;
-       if (copy_to_user(desc->arg.buf, from, avail)) {
-               desc->error = -EFAULT;
-               ret = 0;
-       }
-       desc->arg.data += ret;
-       desc->written += ret;
-       desc->count -= ret;
-
-       return ret;
-}
-
-typedef int (*subbuf_actor_t) (size_t read_start,
-                              struct rchan_buf *buf,
-                              size_t avail,
-                              read_descriptor_t *desc);
-
-/*
- *     relay_file_read_subbufs - read count bytes, bridging subbuf boundaries
- */
-static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
-                                       subbuf_actor_t subbuf_actor,
-                                       read_descriptor_t *desc)
+static ssize_t relay_file_read(struct file *filp,
+                              char __user *buffer,
+                              size_t count,
+                              loff_t *ppos)
 {
        struct rchan_buf *buf = filp->private_data;
        size_t read_start, avail;
+       size_t written = 0;
        int ret;
 
-       if (!desc->count)
+       if (!count)
                return 0;
 
        inode_lock(file_inode(filp));
        do {
+               void *from;
+
                if (!relay_file_read_avail(buf, *ppos))
                        break;
 
@@ -1161,32 +1133,22 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
                if (!avail)
                        break;
 
-               avail = min(desc->count, avail);
-               ret = subbuf_actor(read_start, buf, avail, desc);
-               if (desc->error < 0)
+               avail = min(count, avail);
+               from = buf->start + read_start;
+               ret = avail;
+               if (copy_to_user(buffer, from, avail))
                        break;
 
-               if (ret) {
-                       relay_file_read_consume(buf, read_start, ret);
-                       *ppos = relay_file_read_end_pos(buf, read_start, ret);
-               }
-       } while (desc->count && ret);
-       inode_unlock(file_inode(filp));
+               buffer += ret;
+               written += ret;
+               count -= ret;
 
-       return desc->written;
-}
+               relay_file_read_consume(buf, read_start, ret);
+               *ppos = relay_file_read_end_pos(buf, read_start, ret);
+       } while (count);
+       inode_unlock(file_inode(filp));
 
-static ssize_t relay_file_read(struct file *filp,
-                              char __user *buffer,
-                              size_t count,
-                              loff_t *ppos)
-{
-       read_descriptor_t desc;
-       desc.written = 0;
-       desc.count = count;
-       desc.arg.buf = buffer;
-       desc.error = 0;
-       return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc);
+       return written;
 }
 
 static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed)
index 7e3138c..48b8c27 100644 (file)
@@ -3,8 +3,11 @@
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/splice.h>
 #include <net/checksum.h>
 
+#define PIPE_PARANOIA /* for now */
+
 #define iterate_iovec(i, n, __v, __p, skip, STEP) {    \
        size_t left;                                    \
        size_t wanted = n;                              \
@@ -290,6 +293,93 @@ done:
        return wanted - bytes;
 }
 
+#ifdef PIPE_PARANOIA
+static bool sanity(const struct iov_iter *i)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       int idx = i->idx;
+       int next = pipe->curbuf + pipe->nrbufs;
+       if (i->iov_offset) {
+               struct pipe_buffer *p;
+               if (unlikely(!pipe->nrbufs))
+                       goto Bad;       // pipe must be non-empty
+               if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
+                       goto Bad;       // must be at the last buffer...
+
+               p = &pipe->bufs[idx];
+               if (unlikely(p->offset + p->len != i->iov_offset))
+                       goto Bad;       // ... at the end of segment
+       } else {
+               if (idx != (next & (pipe->buffers - 1)))
+                       goto Bad;       // must be right after the last buffer
+       }
+       return true;
+Bad:
+       printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
+       printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
+                       pipe->curbuf, pipe->nrbufs, pipe->buffers);
+       for (idx = 0; idx < pipe->buffers; idx++)
+               printk(KERN_ERR "[%p %p %d %d]\n",
+                       pipe->bufs[idx].ops,
+                       pipe->bufs[idx].page,
+                       pipe->bufs[idx].offset,
+                       pipe->bufs[idx].len);
+       WARN_ON(1);
+       return false;
+}
+#else
+#define sanity(i) true
+#endif
+
+static inline int next_idx(int idx, struct pipe_inode_info *pipe)
+{
+       return (idx + 1) & (pipe->buffers - 1);
+}
+
+static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       struct pipe_buffer *buf;
+       size_t off;
+       int idx;
+
+       if (unlikely(bytes > i->count))
+               bytes = i->count;
+
+       if (unlikely(!bytes))
+               return 0;
+
+       if (!sanity(i))
+               return 0;
+
+       off = i->iov_offset;
+       idx = i->idx;
+       buf = &pipe->bufs[idx];
+       if (off) {
+               if (offset == off && buf->page == page) {
+                       /* merge with the last one */
+                       buf->len += bytes;
+                       i->iov_offset += bytes;
+                       goto out;
+               }
+               idx = next_idx(idx, pipe);
+               buf = &pipe->bufs[idx];
+       }
+       if (idx == pipe->curbuf && pipe->nrbufs)
+               return 0;
+       pipe->nrbufs++;
+       buf->ops = &page_cache_pipe_buf_ops;
+       get_page(buf->page = page);
+       buf->offset = offset;
+       buf->len = bytes;
+       i->iov_offset = offset + bytes;
+       i->idx = idx;
+out:
+       i->count -= bytes;
+       return bytes;
+}
+
 /*
  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
  * bytes.  For each iovec, fault in each page that constitutes the iovec.
@@ -356,9 +446,98 @@ static void memzero_page(struct page *page, size_t offset, size_t len)
        kunmap_atomic(addr);
 }
 
+static inline bool allocated(struct pipe_buffer *buf)
+{
+       return buf->ops == &default_pipe_buf_ops;
+}
+
+static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
+{
+       size_t off = i->iov_offset;
+       int idx = i->idx;
+       if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
+               idx = next_idx(idx, i->pipe);
+               off = 0;
+       }
+       *idxp = idx;
+       *offp = off;
+}
+
+static size_t push_pipe(struct iov_iter *i, size_t size,
+                       int *idxp, size_t *offp)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       size_t off;
+       int idx;
+       ssize_t left;
+
+       if (unlikely(size > i->count))
+               size = i->count;
+       if (unlikely(!size))
+               return 0;
+
+       left = size;
+       data_start(i, &idx, &off);
+       *idxp = idx;
+       *offp = off;
+       if (off) {
+               left -= PAGE_SIZE - off;
+               if (left <= 0) {
+                       pipe->bufs[idx].len += size;
+                       return size;
+               }
+               pipe->bufs[idx].len = PAGE_SIZE;
+               idx = next_idx(idx, pipe);
+       }
+       while (idx != pipe->curbuf || !pipe->nrbufs) {
+               struct page *page = alloc_page(GFP_USER);
+               if (!page)
+                       break;
+               pipe->nrbufs++;
+               pipe->bufs[idx].ops = &default_pipe_buf_ops;
+               pipe->bufs[idx].page = page;
+               pipe->bufs[idx].offset = 0;
+               if (left <= PAGE_SIZE) {
+                       pipe->bufs[idx].len = left;
+                       return size;
+               }
+               pipe->bufs[idx].len = PAGE_SIZE;
+               left -= PAGE_SIZE;
+               idx = next_idx(idx, pipe);
+       }
+       return size - left;
+}
+
+static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
+                               struct iov_iter *i)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       size_t n, off;
+       int idx;
+
+       if (!sanity(i))
+               return 0;
+
+       bytes = n = push_pipe(i, bytes, &idx, &off);
+       if (unlikely(!n))
+               return 0;
+       for ( ; n; idx = next_idx(idx, pipe), off = 0) {
+               size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
+               memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
+               i->idx = idx;
+               i->iov_offset = off + chunk;
+               n -= chunk;
+               addr += chunk;
+       }
+       i->count -= bytes;
+       return bytes;
+}
+
 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
        const char *from = addr;
+       if (unlikely(i->type & ITER_PIPE))
+               return copy_pipe_to_iter(addr, bytes, i);
        iterate_and_advance(i, bytes, v,
                __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
                               v.iov_len),
@@ -374,6 +553,10 @@ EXPORT_SYMBOL(copy_to_iter);
 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
        char *to = addr;
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return 0;
+       }
        iterate_and_advance(i, bytes, v,
                __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
                                 v.iov_len),
@@ -389,6 +572,10 @@ EXPORT_SYMBOL(copy_from_iter);
 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
        char *to = addr;
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return 0;
+       }
        iterate_and_advance(i, bytes, v,
                __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
                                         v.iov_base, v.iov_len),
@@ -409,14 +596,20 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
                size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
                kunmap_atomic(kaddr);
                return wanted;
-       } else
+       } else if (likely(!(i->type & ITER_PIPE)))
                return copy_page_to_iter_iovec(page, offset, bytes, i);
+       else
+               return copy_page_to_iter_pipe(page, offset, bytes, i);
 }
 EXPORT_SYMBOL(copy_page_to_iter);
 
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i)
 {
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return 0;
+       }
        if (i->type & (ITER_BVEC|ITER_KVEC)) {
                void *kaddr = kmap_atomic(page);
                size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
@@ -427,8 +620,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 }
 EXPORT_SYMBOL(copy_page_from_iter);
 
+static size_t pipe_zero(size_t bytes, struct iov_iter *i)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       size_t n, off;
+       int idx;
+
+       if (!sanity(i))
+               return 0;
+
+       bytes = n = push_pipe(i, bytes, &idx, &off);
+       if (unlikely(!n))
+               return 0;
+
+       for ( ; n; idx = next_idx(idx, pipe), off = 0) {
+               size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
+               memzero_page(pipe->bufs[idx].page, off, chunk);
+               i->idx = idx;
+               i->iov_offset = off + chunk;
+               n -= chunk;
+       }
+       i->count -= bytes;
+       return bytes;
+}
+
 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 {
+       if (unlikely(i->type & ITER_PIPE))
+               return pipe_zero(bytes, i);
        iterate_and_advance(i, bytes, v,
                __clear_user(v.iov_base, v.iov_len),
                memzero_page(v.bv_page, v.bv_offset, v.bv_len),
@@ -443,6 +662,11 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
                struct iov_iter *i, unsigned long offset, size_t bytes)
 {
        char *kaddr = kmap_atomic(page), *p = kaddr + offset;
+       if (unlikely(i->type & ITER_PIPE)) {
+               kunmap_atomic(kaddr);
+               WARN_ON(1);
+               return 0;
+       }
        iterate_all_kinds(i, bytes, v,
                __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
                                          v.iov_base, v.iov_len),
@@ -455,8 +679,49 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 }
 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
+static void pipe_advance(struct iov_iter *i, size_t size)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       struct pipe_buffer *buf;
+       int idx = i->idx;
+       size_t off = i->iov_offset;
+       
+       if (unlikely(i->count < size))
+               size = i->count;
+
+       if (size) {
+               if (off) /* make it relative to the beginning of buffer */
+                       size += off - pipe->bufs[idx].offset;
+               while (1) {
+                       buf = &pipe->bufs[idx];
+                       if (size <= buf->len)
+                               break;
+                       size -= buf->len;
+                       idx = next_idx(idx, pipe);
+               }
+               buf->len = size;
+               i->idx = idx;
+               off = i->iov_offset = buf->offset + size;
+       }
+       if (off)
+               idx = next_idx(idx, pipe);
+       if (pipe->nrbufs) {
+               int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+               /* [curbuf,unused) is in use.  Free [idx,unused) */
+               while (idx != unused) {
+                       pipe_buf_release(pipe, &pipe->bufs[idx]);
+                       idx = next_idx(idx, pipe);
+                       pipe->nrbufs--;
+               }
+       }
+}
+
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
+       if (unlikely(i->type & ITER_PIPE)) {
+               pipe_advance(i, size);
+               return;
+       }
        iterate_and_advance(i, size, v, 0, 0, 0)
 }
 EXPORT_SYMBOL(iov_iter_advance);
@@ -466,6 +731,8 @@ EXPORT_SYMBOL(iov_iter_advance);
  */
 size_t iov_iter_single_seg_count(const struct iov_iter *i)
 {
+       if (unlikely(i->type & ITER_PIPE))
+               return i->count;        // it is a silly place, anyway
        if (i->nr_segs == 1)
                return i->count;
        else if (i->type & ITER_BVEC)
@@ -501,6 +768,19 @@ void iov_iter_bvec(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_bvec);
 
+void iov_iter_pipe(struct iov_iter *i, int direction,
+                       struct pipe_inode_info *pipe,
+                       size_t count)
+{
+       BUG_ON(direction != ITER_PIPE);
+       i->type = direction;
+       i->pipe = pipe;
+       i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+       i->iov_offset = 0;
+       i->count = count;
+}
+EXPORT_SYMBOL(iov_iter_pipe);
+
 unsigned long iov_iter_alignment(const struct iov_iter *i)
 {
        unsigned long res = 0;
@@ -509,6 +789,11 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
        if (!size)
                return 0;
 
+       if (unlikely(i->type & ITER_PIPE)) {
+               if (i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
+                       return size | i->iov_offset;
+               return size;
+       }
        iterate_all_kinds(i, size, v,
                (res |= (unsigned long)v.iov_base | v.iov_len, 0),
                res |= v.bv_offset | v.bv_len,
@@ -525,6 +810,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
        if (!size)
                return 0;
 
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return ~0U;
+       }
+
        iterate_all_kinds(i, size, v,
                (res |= (!res ? 0 : (unsigned long)v.iov_base) |
                        (size != v.iov_len ? size : 0), 0),
@@ -537,6 +827,47 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_gap_alignment);
 
+static inline size_t __pipe_get_pages(struct iov_iter *i,
+                               size_t maxsize,
+                               struct page **pages,
+                               int idx,
+                               size_t *start)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       size_t n = push_pipe(i, maxsize, &idx, start);
+       if (!n)
+               return -EFAULT;
+
+       maxsize = n;
+       n += *start;
+       while (n >= PAGE_SIZE) {
+               get_page(*pages++ = pipe->bufs[idx].page);
+               idx = next_idx(idx, pipe);
+               n -= PAGE_SIZE;
+       }
+
+       return maxsize;
+}
+
+static ssize_t pipe_get_pages(struct iov_iter *i,
+                  struct page **pages, size_t maxsize, unsigned maxpages,
+                  size_t *start)
+{
+       unsigned npages;
+       size_t capacity;
+       int idx;
+
+       if (!sanity(i))
+               return -EFAULT;
+
+       data_start(i, &idx, start);
+       /* some of this one + all after this one */
+       npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
+       capacity = min(npages,maxpages) * PAGE_SIZE - *start;
+
+       return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
+}
+
 ssize_t iov_iter_get_pages(struct iov_iter *i,
                   struct page **pages, size_t maxsize, unsigned maxpages,
                   size_t *start)
@@ -547,6 +878,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
        if (!maxsize)
                return 0;
 
+       if (unlikely(i->type & ITER_PIPE))
+               return pipe_get_pages(i, pages, maxsize, maxpages, start);
        iterate_all_kinds(i, maxsize, v, ({
                unsigned long addr = (unsigned long)v.iov_base;
                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -582,6 +915,37 @@ static struct page **get_pages_array(size_t n)
        return p;
 }
 
+static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
+                  struct page ***pages, size_t maxsize,
+                  size_t *start)
+{
+       struct page **p;
+       size_t n;
+       int idx;
+       int npages;
+
+       if (!sanity(i))
+               return -EFAULT;
+
+       data_start(i, &idx, start);
+       /* some of this one + all after this one */
+       npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
+       n = npages * PAGE_SIZE - *start;
+       if (maxsize > n)
+               maxsize = n;
+       else
+               npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
+       p = get_pages_array(npages);
+       if (!p)
+               return -ENOMEM;
+       n = __pipe_get_pages(i, maxsize, p, idx, start);
+       if (n > 0)
+               *pages = p;
+       else
+               kvfree(p);
+       return n;
+}
+
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
                   struct page ***pages, size_t maxsize,
                   size_t *start)
@@ -594,6 +958,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
        if (!maxsize)
                return 0;
 
+       if (unlikely(i->type & ITER_PIPE))
+               return pipe_get_pages_alloc(i, pages, maxsize, start);
        iterate_all_kinds(i, maxsize, v, ({
                unsigned long addr = (unsigned long)v.iov_base;
                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -635,6 +1001,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
        __wsum sum, next;
        size_t off = 0;
        sum = *csum;
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return 0;
+       }
        iterate_and_advance(i, bytes, v, ({
                int err = 0;
                next = csum_and_copy_from_user(v.iov_base, 
@@ -673,6 +1043,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
        __wsum sum, next;
        size_t off = 0;
        sum = *csum;
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);     /* for now */
+               return 0;
+       }
        iterate_and_advance(i, bytes, v, ({
                int err = 0;
                next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
@@ -712,7 +1086,20 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
        if (!size)
                return 0;
 
-       iterate_all_kinds(i, size, v, ({
+       if (unlikely(i->type & ITER_PIPE)) {
+               struct pipe_inode_info *pipe = i->pipe;
+               size_t off;
+               int idx;
+
+               if (!sanity(i))
+                       return 0;
+
+               data_start(i, &idx, &off);
+               /* some of this one + all after this one */
+               npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
+               if (npages >= maxpages)
+                       return maxpages;
+       } else iterate_all_kinds(i, size, v, ({
                unsigned long p = (unsigned long)v.iov_base;
                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
                        - p / PAGE_SIZE;
@@ -737,6 +1124,10 @@ EXPORT_SYMBOL(iov_iter_npages);
 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 {
        *new = *old;
+       if (unlikely(new->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return NULL;
+       }
        if (new->type & ITER_BVEC)
                return new->bvec = kmemdup(new->bvec,
                                    new->nr_segs * sizeof(struct bio_vec),
index 971fc83..d86b5e4 100644 (file)
@@ -2311,119 +2311,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        return retval ? retval : error;
 }
 
-static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
-                               struct pipe_inode_info *pipe, size_t len,
-                               unsigned int flags)
-{
-       struct address_space *mapping = in->f_mapping;
-       struct inode *inode = mapping->host;
-       unsigned int loff, nr_pages, req_pages;
-       struct page *pages[PIPE_DEF_BUFFERS];
-       struct partial_page partial[PIPE_DEF_BUFFERS];
-       struct page *page;
-       pgoff_t index, end_index;
-       loff_t isize, left;
-       int error, page_nr;
-       struct splice_pipe_desc spd = {
-               .pages = pages,
-               .partial = partial,
-               .nr_pages_max = PIPE_DEF_BUFFERS,
-               .flags = flags,
-               .ops = &page_cache_pipe_buf_ops,
-               .spd_release = spd_release_page,
-       };
-
-       isize = i_size_read(inode);
-       if (unlikely(*ppos >= isize))
-               return 0;
-
-       left = isize - *ppos;
-       if (unlikely(left < len))
-               len = left;
-
-       if (splice_grow_spd(pipe, &spd))
-               return -ENOMEM;
-
-       index = *ppos >> PAGE_SHIFT;
-       loff = *ppos & ~PAGE_MASK;
-       req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       nr_pages = min(req_pages, spd.nr_pages_max);
-
-       spd.nr_pages = find_get_pages_contig(mapping, index,
-                                               nr_pages, spd.pages);
-       index += spd.nr_pages;
-       error = 0;
-
-       while (spd.nr_pages < nr_pages) {
-               error = shmem_getpage(inode, index, &page, SGP_CACHE);
-               if (error)
-                       break;
-               unlock_page(page);
-               spd.pages[spd.nr_pages++] = page;
-               index++;
-       }
-
-       index = *ppos >> PAGE_SHIFT;
-       nr_pages = spd.nr_pages;
-       spd.nr_pages = 0;
-
-       for (page_nr = 0; page_nr < nr_pages; page_nr++) {
-               unsigned int this_len;
-
-               if (!len)
-                       break;
-
-               this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
-               page = spd.pages[page_nr];
-
-               if (!PageUptodate(page) || page->mapping != mapping) {
-                       error = shmem_getpage(inode, index, &page, SGP_CACHE);
-                       if (error)
-                               break;
-                       unlock_page(page);
-                       put_page(spd.pages[page_nr]);
-                       spd.pages[page_nr] = page;
-               }
-
-               isize = i_size_read(inode);
-               end_index = (isize - 1) >> PAGE_SHIFT;
-               if (unlikely(!isize || index > end_index))
-                       break;
-
-               if (end_index == index) {
-                       unsigned int plen;
-
-                       plen = ((isize - 1) & ~PAGE_MASK) + 1;
-                       if (plen <= loff)
-                               break;
-
-                       this_len = min(this_len, plen - loff);
-                       len = this_len;
-               }
-
-               spd.partial[page_nr].offset = loff;
-               spd.partial[page_nr].len = this_len;
-               len -= this_len;
-               loff = 0;
-               spd.nr_pages++;
-               index++;
-       }
-
-       while (page_nr < nr_pages)
-               put_page(spd.pages[page_nr++]);
-
-       if (spd.nr_pages)
-               error = splice_to_pipe(pipe, &spd);
-
-       splice_shrink_spd(&spd);
-
-       if (error > 0) {
-               *ppos += error;
-               file_accessed(in);
-       }
-       return error;
-}
-
 /*
  * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
  */
@@ -3786,7 +3673,7 @@ static const struct file_operations shmem_file_operations = {
        .read_iter      = shmem_file_read_iter,
        .write_iter     = generic_file_write_iter,
        .fsync          = noop_fsync,
-       .splice_read    = shmem_file_splice_read,
+       .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .fallocate      = shmem_fallocate,
 #endif
index cbd19d2..1e3e008 100644 (file)
@@ -1962,37 +1962,13 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
        return false;
 }
 
-ssize_t skb_socket_splice(struct sock *sk,
-                         struct pipe_inode_info *pipe,
-                         struct splice_pipe_desc *spd)
-{
-       int ret;
-
-       /* Drop the socket lock, otherwise we have reverse
-        * locking dependencies between sk_lock and i_mutex
-        * here as compared to sendfile(). We enter here
-        * with the socket lock held, and splice_to_pipe() will
-        * grab the pipe inode lock. For sendfile() emulation,
-        * we call into ->sendpage() with the i_mutex lock held
-        * and networking will grab the socket lock.
-        */
-       release_sock(sk);
-       ret = splice_to_pipe(pipe, spd);
-       lock_sock(sk);
-
-       return ret;
-}
-
 /*
  * Map data from the skb to a pipe. Should handle both the linear part,
  * the fragments, and the frag list.
  */
 int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
                    struct pipe_inode_info *pipe, unsigned int tlen,
-                   unsigned int flags,
-                   ssize_t (*splice_cb)(struct sock *,
-                                        struct pipe_inode_info *,
-                                        struct splice_pipe_desc *))
+                   unsigned int flags)
 {
        struct partial_page partial[MAX_SKB_FRAGS];
        struct page *pages[MAX_SKB_FRAGS];
@@ -2009,7 +1985,7 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
        __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
 
        if (spd.nr_pages)
-               ret = splice_cb(sk, pipe, &spd);
+               ret = splice_to_pipe(pipe, &spd);
 
        return ret;
 }
index f253e50..2414b7c 100644 (file)
@@ -691,8 +691,7 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
        int ret;
 
        ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe,
-                             min(rd_desc->count, len), tss->flags,
-                             skb_socket_splice);
+                             min(rd_desc->count, len), tss->flags);
        if (ret > 0)
                rd_desc->count -= ret;
        return ret;
index b7f869a..7e08a4d 100644 (file)
@@ -1160,19 +1160,6 @@ out:
        return copied ? : err;
 }
 
-static ssize_t kcm_sock_splice(struct sock *sk,
-                              struct pipe_inode_info *pipe,
-                              struct splice_pipe_desc *spd)
-{
-       int ret;
-
-       release_sock(sk);
-       ret = splice_to_pipe(pipe, spd);
-       lock_sock(sk);
-
-       return ret;
-}
-
 static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
                               struct pipe_inode_info *pipe, size_t len,
                               unsigned int flags)
@@ -1202,8 +1189,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
        if (len > rxm->full_len)
                len = rxm->full_len;
 
-       copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags,
-                                kcm_sock_splice);
+       copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags);
        if (copied < 0) {
                err = copied;
                goto err_out;
index 8309687..145082e 100644 (file)
@@ -2475,28 +2475,13 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
        return unix_stream_read_generic(&state);
 }
 
-static ssize_t skb_unix_socket_splice(struct sock *sk,
-                                     struct pipe_inode_info *pipe,
-                                     struct splice_pipe_desc *spd)
-{
-       int ret;
-       struct unix_sock *u = unix_sk(sk);
-
-       mutex_unlock(&u->iolock);
-       ret = splice_to_pipe(pipe, spd);
-       mutex_lock(&u->iolock);
-
-       return ret;
-}
-
 static int unix_stream_splice_actor(struct sk_buff *skb,
                                    int skip, int chunk,
                                    struct unix_stream_read_state *state)
 {
        return skb_splice_bits(skb, state->socket->sk,
                               UNIXCB(skb).consumed + skip,
-                              state->pipe, chunk, state->splice_flags,
-                              skb_unix_socket_splice);
+                              state->pipe, chunk, state->splice_flags);
 }
 
 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,