fix ITER_PIPE interaction with direct_IO
[cascardo/linux.git] / fs / splice.c
index 589a1d5..153d4f3 100644 (file)
@@ -238,8 +238,7 @@ ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
                pipe->nrbufs++;
                return buf->len;
        }
-       buf->ops->release(pipe, buf);
-       buf->ops = NULL;
+       pipe_buf_release(pipe, buf);
        return ret;
 }
 EXPORT_SYMBOL(add_to_pipe);
@@ -281,207 +280,6 @@ void splice_shrink_spd(struct splice_pipe_desc *spd)
        kfree(spd->partial);
 }
 
-static int
-__generic_file_splice_read(struct file *in, loff_t *ppos,
-                          struct pipe_inode_info *pipe, size_t len,
-                          unsigned int flags)
-{
-       struct address_space *mapping = in->f_mapping;
-       unsigned int loff, nr_pages, req_pages;
-       struct page *pages[PIPE_DEF_BUFFERS];
-       struct partial_page partial[PIPE_DEF_BUFFERS];
-       struct page *page;
-       pgoff_t index, end_index;
-       loff_t isize;
-       int error, page_nr;
-       struct splice_pipe_desc spd = {
-               .pages = pages,
-               .partial = partial,
-               .nr_pages_max = PIPE_DEF_BUFFERS,
-               .flags = flags,
-               .ops = &page_cache_pipe_buf_ops,
-               .spd_release = spd_release_page,
-       };
-
-       if (splice_grow_spd(pipe, &spd))
-               return -ENOMEM;
-
-       index = *ppos >> PAGE_SHIFT;
-       loff = *ppos & ~PAGE_MASK;
-       req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       nr_pages = min(req_pages, spd.nr_pages_max);
-
-       /*
-        * Lookup the (hopefully) full range of pages we need.
-        */
-       spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages);
-       index += spd.nr_pages;
-
-       /*
-        * If find_get_pages_contig() returned fewer pages than we needed,
-        * readahead/allocate the rest and fill in the holes.
-        */
-       if (spd.nr_pages < nr_pages)
-               page_cache_sync_readahead(mapping, &in->f_ra, in,
-                               index, req_pages - spd.nr_pages);
-
-       error = 0;
-       while (spd.nr_pages < nr_pages) {
-               /*
-                * Page could be there, find_get_pages_contig() breaks on
-                * the first hole.
-                */
-               page = find_get_page(mapping, index);
-               if (!page) {
-                       /*
-                        * page didn't exist, allocate one.
-                        */
-                       page = page_cache_alloc_cold(mapping);
-                       if (!page)
-                               break;
-
-                       error = add_to_page_cache_lru(page, mapping, index,
-                                  mapping_gfp_constraint(mapping, GFP_KERNEL));
-                       if (unlikely(error)) {
-                               put_page(page);
-                               if (error == -EEXIST)
-                                       continue;
-                               break;
-                       }
-                       /*
-                        * add_to_page_cache() locks the page, unlock it
-                        * to avoid convoluting the logic below even more.
-                        */
-                       unlock_page(page);
-               }
-
-               spd.pages[spd.nr_pages++] = page;
-               index++;
-       }
-
-       /*
-        * Now loop over the map and see if we need to start IO on any
-        * pages, fill in the partial map, etc.
-        */
-       index = *ppos >> PAGE_SHIFT;
-       nr_pages = spd.nr_pages;
-       spd.nr_pages = 0;
-       for (page_nr = 0; page_nr < nr_pages; page_nr++) {
-               unsigned int this_len;
-
-               if (!len)
-                       break;
-
-               /*
-                * this_len is the max we'll use from this page
-                */
-               this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
-               page = spd.pages[page_nr];
-
-               if (PageReadahead(page))
-                       page_cache_async_readahead(mapping, &in->f_ra, in,
-                                       page, index, req_pages - page_nr);
-
-               /*
-                * If the page isn't uptodate, we may need to start io on it
-                */
-               if (!PageUptodate(page)) {
-                       lock_page(page);
-
-                       /*
-                        * Page was truncated, or invalidated by the
-                        * filesystem.  Redo the find/create, but this time the
-                        * page is kept locked, so there's no chance of another
-                        * race with truncate/invalidate.
-                        */
-                       if (!page->mapping) {
-                               unlock_page(page);
-retry_lookup:
-                               page = find_or_create_page(mapping, index,
-                                               mapping_gfp_mask(mapping));
-
-                               if (!page) {
-                                       error = -ENOMEM;
-                                       break;
-                               }
-                               put_page(spd.pages[page_nr]);
-                               spd.pages[page_nr] = page;
-                       }
-                       /*
-                        * page was already under io and is now done, great
-                        */
-                       if (PageUptodate(page)) {
-                               unlock_page(page);
-                               goto fill_it;
-                       }
-
-                       /*
-                        * need to read in the page
-                        */
-                       error = mapping->a_ops->readpage(in, page);
-                       if (unlikely(error)) {
-                               /*
-                                * Re-lookup the page
-                                */
-                               if (error == AOP_TRUNCATED_PAGE)
-                                       goto retry_lookup;
-
-                               break;
-                       }
-               }
-fill_it:
-               /*
-                * i_size must be checked after PageUptodate.
-                */
-               isize = i_size_read(mapping->host);
-               end_index = (isize - 1) >> PAGE_SHIFT;
-               if (unlikely(!isize || index > end_index))
-                       break;
-
-               /*
-                * if this is the last page, see if we need to shrink
-                * the length and stop
-                */
-               if (end_index == index) {
-                       unsigned int plen;
-
-                       /*
-                        * max good bytes in this page
-                        */
-                       plen = ((isize - 1) & ~PAGE_MASK) + 1;
-                       if (plen <= loff)
-                               break;
-
-                       /*
-                        * force quit after adding this page
-                        */
-                       this_len = min(this_len, plen - loff);
-                       len = this_len;
-               }
-
-               spd.partial[page_nr].offset = loff;
-               spd.partial[page_nr].len = this_len;
-               len -= this_len;
-               loff = 0;
-               spd.nr_pages++;
-               index++;
-       }
-
-       /*
-        * Release any pages at the end, if we quit early. 'page_nr' is how far
-        * we got, 'nr_pages' is how many pages are in the map.
-        */
-       while (page_nr < nr_pages)
-               put_page(spd.pages[page_nr++]);
-       in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
-
-       if (spd.nr_pages)
-               error = splice_to_pipe(pipe, &spd);
-
-       splice_shrink_spd(&spd);
-       return error;
-}
-
 /**
  * generic_file_splice_read - splice data from file to a pipe
  * @in:                file to splice from
@@ -492,32 +290,40 @@ fill_it:
  *
  * Description:
  *    Will read pages from given file and fill them into a pipe. Can be
- *    used as long as the address_space operations for the source implements
- *    a readpage() hook.
+ *    used as long as it has more or less sane ->read_iter().
  *
  */
 ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
                                 struct pipe_inode_info *pipe, size_t len,
                                 unsigned int flags)
 {
-       loff_t isize, left;
-       int ret;
-
-       if (IS_DAX(in->f_mapping->host))
-               return default_file_splice_read(in, ppos, pipe, len, flags);
+       struct iov_iter to;
+       struct kiocb kiocb;
+       loff_t isize;
+       int idx, ret;
 
        isize = i_size_read(in->f_mapping->host);
        if (unlikely(*ppos >= isize))
                return 0;
 
-       left = isize - *ppos;
-       if (unlikely(left < len))
-               len = left;
-
-       ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
+       iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
+       idx = to.idx;
+       init_sync_kiocb(&kiocb, in);
+       kiocb.ki_pos = *ppos;
+       ret = in->f_op->read_iter(&kiocb, &to);
        if (ret > 0) {
-               *ppos += ret;
+               *ppos = kiocb.ki_pos;
                file_accessed(in);
+       } else if (ret < 0) {
+               to.idx = idx;
+               to.iov_offset = 0;
+               iov_iter_advance(&to, 0); /* to free what was emitted */
+               /*
+                * callers of ->splice_read() expect -EAGAIN on
+                * "can't put anything in there", rather than -EFAULT.
+                */
+               if (ret == -EFAULT)
+                       ret = -EAGAIN;
        }
 
        return ret;
@@ -548,7 +354,7 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {
 };
 EXPORT_SYMBOL(nosteal_pipe_buf_ops);
 
-static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
+static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
                            unsigned long vlen, loff_t offset)
 {
        mm_segment_t old_fs;
@@ -580,102 +386,70 @@ ssize_t kernel_write(struct file *file, const char *buf, size_t count,
 }
 EXPORT_SYMBOL(kernel_write);
 
-ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
                                 struct pipe_inode_info *pipe, size_t len,
                                 unsigned int flags)
 {
+       struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
+       struct iov_iter to;
+       struct page **pages;
        unsigned int nr_pages;
-       unsigned int nr_freed;
-       size_t offset;
-       struct page *pages[PIPE_DEF_BUFFERS];
-       struct partial_page partial[PIPE_DEF_BUFFERS];
-       struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
+       size_t offset, dummy, copied = 0;
        ssize_t res;
-       size_t this_len;
-       int error;
        int i;
-       struct splice_pipe_desc spd = {
-               .pages = pages,
-               .partial = partial,
-               .nr_pages_max = PIPE_DEF_BUFFERS,
-               .flags = flags,
-               .ops = &default_pipe_buf_ops,
-               .spd_release = spd_release_page,
-       };
 
-       if (splice_grow_spd(pipe, &spd))
+       if (pipe->nrbufs == pipe->buffers)
+               return -EAGAIN;
+
+       /*
+        * Try to keep page boundaries matching to source pagecache ones -
+        * it probably won't be much help, but...
+        */
+       offset = *ppos & ~PAGE_MASK;
+
+       iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
+
+       res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &dummy);
+       if (res <= 0)
                return -ENOMEM;
 
-       res = -ENOMEM;
+       nr_pages = res / PAGE_SIZE;
+
        vec = __vec;
-       if (spd.nr_pages_max > PIPE_DEF_BUFFERS) {
-               vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);
-               if (!vec)
-                       goto shrink_ret;
+       if (nr_pages > PIPE_DEF_BUFFERS) {
+               vec = kmalloc(nr_pages * sizeof(struct kvec), GFP_KERNEL);
+               if (unlikely(!vec)) {
+                       res = -ENOMEM;
+                       goto out;
+               }
        }
 
-       offset = *ppos & ~PAGE_MASK;
-       nr_pages = (len + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
-       for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
-               struct page *page;
-
-               page = alloc_page(GFP_USER);
-               error = -ENOMEM;
-               if (!page)
-                       goto err;
+       pipe->bufs[to.idx].offset = offset;
+       pipe->bufs[to.idx].len -= offset;
 
-               this_len = min_t(size_t, len, PAGE_SIZE - offset);
-               vec[i].iov_base = (void __user *) page_address(page);
+       for (i = 0; i < nr_pages; i++) {
+               size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
+               vec[i].iov_base = page_address(pages[i]) + offset;
                vec[i].iov_len = this_len;
-               spd.pages[i] = page;
-               spd.nr_pages++;
                len -= this_len;
                offset = 0;
        }
 
-       res = kernel_readv(in, vec, spd.nr_pages, *ppos);
-       if (res < 0) {
-               error = res;
-               goto err;
-       }
-
-       error = 0;
-       if (!res)
-               goto err;
-
-       nr_freed = 0;
-       for (i = 0; i < spd.nr_pages; i++) {
-               this_len = min_t(size_t, vec[i].iov_len, res);
-               spd.partial[i].offset = 0;
-               spd.partial[i].len = this_len;
-               if (!this_len) {
-                       __free_page(spd.pages[i]);
-                       spd.pages[i] = NULL;
-                       nr_freed++;
-               }
-               res -= this_len;
-       }
-       spd.nr_pages -= nr_freed;
-
-       res = splice_to_pipe(pipe, &spd);
-       if (res > 0)
+       res = kernel_readv(in, vec, nr_pages, *ppos);
+       if (res > 0) {
+               copied = res;
                *ppos += res;
+       }
 
-shrink_ret:
        if (vec != __vec)
                kfree(vec);
-       splice_shrink_spd(&spd);
+out:
+       for (i = 0; i < nr_pages; i++)
+               put_page(pages[i]);
+       kvfree(pages);
+       iov_iter_advance(&to, copied);  /* truncates and discards */
        return res;
-
-err:
-       for (i = 0; i < spd.nr_pages; i++)
-               __free_page(spd.pages[i]);
-
-       res = error;
-       goto shrink_ret;
 }
-EXPORT_SYMBOL(default_file_splice_read);
 
 /*
  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
@@ -735,13 +509,12 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
 
        while (pipe->nrbufs) {
                struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
-               const struct pipe_buf_operations *ops = buf->ops;
 
                sd->len = buf->len;
                if (sd->len > sd->total_len)
                        sd->len = sd->total_len;
 
-               ret = buf->ops->confirm(pipe, buf);
+               ret = pipe_buf_confirm(pipe, buf);
                if (unlikely(ret)) {
                        if (ret == -ENODATA)
                                ret = 0;
@@ -761,8 +534,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
                sd->total_len -= ret;
 
                if (!buf->len) {
-                       buf->ops = NULL;
-                       ops->release(pipe, buf);
+                       pipe_buf_release(pipe, buf);
                        pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
                        pipe->nrbufs--;
                        if (pipe->files)
@@ -981,7 +753,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                        if (idx == pipe->buffers - 1)
                                idx = -1;
 
-                       ret = buf->ops->confirm(pipe, buf);
+                       ret = pipe_buf_confirm(pipe, buf);
                        if (unlikely(ret)) {
                                if (ret == -ENODATA)
                                        ret = 0;
@@ -1008,11 +780,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                while (ret) {
                        struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
                        if (ret >= buf->len) {
-                               const struct pipe_buf_operations *ops = buf->ops;
                                ret -= buf->len;
                                buf->len = 0;
-                               buf->ops = NULL;
-                               ops->release(pipe, buf);
+                               pipe_buf_release(pipe, buf);
                                pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
                                pipe->nrbufs--;
                                if (pipe->files)
@@ -1251,10 +1021,8 @@ out_release:
        for (i = 0; i < pipe->buffers; i++) {
                struct pipe_buffer *buf = pipe->bufs + i;
 
-               if (buf->ops) {
-                       buf->ops->release(pipe, buf);
-                       buf->ops = NULL;
-               }
+               if (buf->ops)
+                       pipe_buf_release(pipe, buf);
        }
 
        if (!bytes)
@@ -1815,7 +1583,7 @@ retry:
                         * Get a reference to this pipe buffer,
                         * so we can copy the contents over.
                         */
-                       ibuf->ops->get(ipipe, ibuf);
+                       pipe_buf_get(ipipe, ibuf);
                        *obuf = *ibuf;
 
                        /*
@@ -1887,7 +1655,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
                 * Get a reference to this pipe buffer,
                 * so we can copy the contents over.
                 */
-               ibuf->ops->get(ipipe, ibuf);
+               pipe_buf_get(ipipe, ibuf);
 
                obuf = opipe->bufs + nbuf;
                *obuf = *ibuf;