new iov_iter flavour: pipe-backed
[cascardo/linux.git] / fs / splice.c
index dd9bf7e..589a1d5 100644 (file)
@@ -183,82 +183,39 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
                       struct splice_pipe_desc *spd)
 {
        unsigned int spd_pages = spd->nr_pages;
-       int ret, do_wakeup, page_nr;
+       int ret = 0, page_nr = 0;
 
        if (!spd_pages)
                return 0;
 
-       ret = 0;
-       do_wakeup = 0;
-       page_nr = 0;
-
-       pipe_lock(pipe);
-
-       for (;;) {
-               if (!pipe->readers) {
-                       send_sig(SIGPIPE, current, 0);
-                       if (!ret)
-                               ret = -EPIPE;
-                       break;
-               }
-
-               if (pipe->nrbufs < pipe->buffers) {
-                       int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
-                       struct pipe_buffer *buf = pipe->bufs + newbuf;
-
-                       buf->page = spd->pages[page_nr];
-                       buf->offset = spd->partial[page_nr].offset;
-                       buf->len = spd->partial[page_nr].len;
-                       buf->private = spd->partial[page_nr].private;
-                       buf->ops = spd->ops;
-                       if (spd->flags & SPLICE_F_GIFT)
-                               buf->flags |= PIPE_BUF_FLAG_GIFT;
-
-                       pipe->nrbufs++;
-                       page_nr++;
-                       ret += buf->len;
-
-                       if (pipe->files)
-                               do_wakeup = 1;
+       if (unlikely(!pipe->readers)) {
+               send_sig(SIGPIPE, current, 0);
+               ret = -EPIPE;
+               goto out;
+       }
 
-                       if (!--spd->nr_pages)
-                               break;
-                       if (pipe->nrbufs < pipe->buffers)
-                               continue;
+       while (pipe->nrbufs < pipe->buffers) {
+               int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+               struct pipe_buffer *buf = pipe->bufs + newbuf;
 
-                       break;
-               }
+               buf->page = spd->pages[page_nr];
+               buf->offset = spd->partial[page_nr].offset;
+               buf->len = spd->partial[page_nr].len;
+               buf->private = spd->partial[page_nr].private;
+               buf->ops = spd->ops;
 
-               if (spd->flags & SPLICE_F_NONBLOCK) {
-                       if (!ret)
-                               ret = -EAGAIN;
-                       break;
-               }
+               pipe->nrbufs++;
+               page_nr++;
+               ret += buf->len;
 
-               if (signal_pending(current)) {
-                       if (!ret)
-                               ret = -ERESTARTSYS;
+               if (!--spd->nr_pages)
                        break;
-               }
-
-               if (do_wakeup) {
-                       smp_mb();
-                       if (waitqueue_active(&pipe->wait))
-                               wake_up_interruptible_sync(&pipe->wait);
-                       kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
-                       do_wakeup = 0;
-               }
-
-               pipe->waiting_writers++;
-               pipe_wait(pipe);
-               pipe->waiting_writers--;
        }
 
-       pipe_unlock(pipe);
-
-       if (do_wakeup)
-               wakeup_pipe_readers(pipe);
+       if (!ret)
+               ret = -EAGAIN;
 
+out:
        while (page_nr < spd_pages)
                spd->spd_release(spd, page_nr++);
 
@@ -266,6 +223,27 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 }
 EXPORT_SYMBOL_GPL(splice_to_pipe);
 
+ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+{
+       int ret;
+
+       if (unlikely(!pipe->readers)) {
+               send_sig(SIGPIPE, current, 0);
+               ret = -EPIPE;
+       } else if (pipe->nrbufs == pipe->buffers) {
+               ret = -EAGAIN;
+       } else {
+               int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+               pipe->bufs[newbuf] = *buf;
+               pipe->nrbufs++;
+               return buf->len;
+       }
+       buf->ops->release(pipe, buf);
+       buf->ops = NULL;
+       return ret;
+}
+EXPORT_SYMBOL(add_to_pipe);
+
 void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
 {
        put_page(spd->pages[i]);
@@ -546,7 +524,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 }
 EXPORT_SYMBOL(generic_file_splice_read);
 
-static const struct pipe_buf_operations default_pipe_buf_ops = {
+const struct pipe_buf_operations default_pipe_buf_ops = {
        .can_merge = 0,
        .confirm = generic_pipe_buf_confirm,
        .release = generic_pipe_buf_release,
@@ -1342,6 +1320,20 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 }
 EXPORT_SYMBOL(do_splice_direct);
 
+static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
+{
+       while (pipe->nrbufs == pipe->buffers) {
+               if (flags & SPLICE_F_NONBLOCK)
+                       return -EAGAIN;
+               if (signal_pending(current))
+                       return -ERESTARTSYS;
+               pipe->waiting_writers++;
+               pipe_wait(pipe);
+               pipe->waiting_writers--;
+       }
+       return 0;
+}
+
 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
                               struct pipe_inode_info *opipe,
                               size_t len, unsigned int flags);
@@ -1424,8 +1416,13 @@ static long do_splice(struct file *in, loff_t __user *off_in,
                        offset = in->f_pos;
                }
 
-               ret = do_splice_to(in, &offset, opipe, len, flags);
-
+               pipe_lock(opipe);
+               ret = wait_for_space(opipe, flags);
+               if (!ret)
+                       ret = do_splice_to(in, &offset, opipe, len, flags);
+               pipe_unlock(opipe);
+               if (ret > 0)
+                       wakeup_pipe_readers(opipe);
                if (!off_in)
                        in->f_pos = offset;
                else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
@@ -1437,106 +1434,50 @@ static long do_splice(struct file *in, loff_t __user *off_in,
        return -EINVAL;
 }
 
-/*
- * Map an iov into an array of pages and offset/length tupples. With the
- * partial_page structure, we can map several non-contiguous ranges into
- * our ones pages[] map instead of splitting that operation into pieces.
- * Could easily be exported as a generic helper for other users, in which
- * case one would probably want to add a 'max_nr_pages' parameter as well.
- */
-static int get_iovec_page_array(const struct iovec __user *iov,
-                               unsigned int nr_vecs, struct page **pages,
-                               struct partial_page *partial, bool aligned,
-                               unsigned int pipe_buffers)
+static int iter_to_pipe(struct iov_iter *from,
+                       struct pipe_inode_info *pipe,
+                       unsigned flags)
 {
-       int buffers = 0, error = 0;
-
-       while (nr_vecs) {
-               unsigned long off, npages;
-               struct iovec entry;
-               void __user *base;
-               size_t len;
-               int i;
-
-               error = -EFAULT;
-               if (copy_from_user(&entry, iov, sizeof(entry)))
-                       break;
-
-               base = entry.iov_base;
-               len = entry.iov_len;
-
-               /*
-                * Sanity check this iovec. 0 read succeeds.
-                */
-               error = 0;
-               if (unlikely(!len))
-                       break;
-               error = -EFAULT;
-               if (!access_ok(VERIFY_READ, base, len))
-                       break;
-
-               /*
-                * Get this base offset and number of pages, then map
-                * in the user pages.
-                */
-               off = (unsigned long) base & ~PAGE_MASK;
-
-               /*
-                * If asked for alignment, the offset must be zero and the
-                * length a multiple of the PAGE_SIZE.
-                */
-               error = -EINVAL;
-               if (aligned && (off || len & ~PAGE_MASK))
-                       break;
-
-               npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-               if (npages > pipe_buffers - buffers)
-                       npages = pipe_buffers - buffers;
-
-               error = get_user_pages_fast((unsigned long)base, npages,
-                                       0, &pages[buffers]);
-
-               if (unlikely(error <= 0))
+       struct pipe_buffer buf = {
+               .ops = &user_page_pipe_buf_ops,
+               .flags = flags
+       };
+       size_t total = 0;
+       int ret = 0;
+       bool failed = false;
+
+       while (iov_iter_count(from) && !failed) {
+               struct page *pages[16];
+               ssize_t copied;
+               size_t start;
+               int n;
+
+               copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start);
+               if (copied <= 0) {
+                       ret = copied;
                        break;
-
-               /*
-                * Fill this contiguous range into the partial page map.
-                */
-               for (i = 0; i < error; i++) {
-                       const int plen = min_t(size_t, len, PAGE_SIZE - off);
-
-                       partial[buffers].offset = off;
-                       partial[buffers].len = plen;
-
-                       off = 0;
-                       len -= plen;
-                       buffers++;
                }
 
-               /*
-                * We didn't complete this iov, stop here since it probably
-                * means we have to move some of this into a pipe to
-                * be able to continue.
-                */
-               if (len)
-                       break;
-
-               /*
-                * Don't continue if we mapped fewer pages than we asked for,
-                * or if we mapped the max number of pages that we have
-                * room for.
-                */
-               if (error < npages || buffers == pipe_buffers)
-                       break;
-
-               nr_vecs--;
-               iov++;
+               for (n = 0; copied; n++, start = 0) {
+                       int size = min_t(int, copied, PAGE_SIZE - start);
+                       if (!failed) {
+                               buf.page = pages[n];
+                               buf.offset = start;
+                               buf.len = size;
+                               ret = add_to_pipe(pipe, &buf);
+                               if (unlikely(ret < 0)) {
+                                       failed = true;
+                               } else {
+                                       iov_iter_advance(from, ret);
+                                       total += ret;
+                               }
+                       } else {
+                               put_page(pages[n]);
+                       }
+                       copied -= size;
+               }
        }
-
-       if (buffers)
-               return buffers;
-
-       return error;
+       return total ? total : ret;
 }
 
 static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
@@ -1590,38 +1531,36 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
  * as splice-from-memory, where the regular splice is splice-from-file (or
  * to file). In both cases the output is a pipe, naturally.
  */
-static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
+static long vmsplice_to_pipe(struct file *file, const struct iovec __user *uiov,
                             unsigned long nr_segs, unsigned int flags)
 {
        struct pipe_inode_info *pipe;
-       struct page *pages[PIPE_DEF_BUFFERS];
-       struct partial_page partial[PIPE_DEF_BUFFERS];
-       struct splice_pipe_desc spd = {
-               .pages = pages,
-               .partial = partial,
-               .nr_pages_max = PIPE_DEF_BUFFERS,
-               .flags = flags,
-               .ops = &user_page_pipe_buf_ops,
-               .spd_release = spd_release_page,
-       };
+       struct iovec iovstack[UIO_FASTIOV];
+       struct iovec *iov = iovstack;
+       struct iov_iter from;
        long ret;
+       unsigned buf_flag = 0;
+
+       if (flags & SPLICE_F_GIFT)
+               buf_flag = PIPE_BUF_FLAG_GIFT;
 
        pipe = get_pipe_info(file);
        if (!pipe)
                return -EBADF;
 
-       if (splice_grow_spd(pipe, &spd))
-               return -ENOMEM;
-
-       spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
-                                           spd.partial, false,
-                                           spd.nr_pages_max);
-       if (spd.nr_pages <= 0)
-               ret = spd.nr_pages;
-       else
-               ret = splice_to_pipe(pipe, &spd);
+       ret = import_iovec(WRITE, uiov, nr_segs,
+                          ARRAY_SIZE(iovstack), &iov, &from);
+       if (ret < 0)
+               return ret;
 
-       splice_shrink_spd(&spd);
+       pipe_lock(pipe);
+       ret = wait_for_space(pipe, flags);
+       if (!ret)
+               ret = iter_to_pipe(&from, pipe, buf_flag);
+       pipe_unlock(pipe);
+       if (ret > 0)
+               wakeup_pipe_readers(pipe);
+       kfree(iov);
        return ret;
 }