Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[cascardo/linux.git] / lib / iov_iter.c
index ce46320..0ce3411 100644 (file)
@@ -3,8 +3,11 @@
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/splice.h>
 #include <net/checksum.h>
 
+#define PIPE_PARANOIA /* for now */
+
 #define iterate_iovec(i, n, __v, __p, skip, STEP) {    \
        size_t left;                                    \
        size_t wanted = n;                              \
@@ -290,6 +293,93 @@ done:
        return wanted - bytes;
 }
 
+#ifdef PIPE_PARANOIA
+static bool sanity(const struct iov_iter *i)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       int idx = i->idx;
+       int next = pipe->curbuf + pipe->nrbufs;
+       if (i->iov_offset) {
+               struct pipe_buffer *p;
+               if (unlikely(!pipe->nrbufs))
+                       goto Bad;       // pipe must be non-empty
+               if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
+                       goto Bad;       // must be at the last buffer...
+
+               p = &pipe->bufs[idx];
+               if (unlikely(p->offset + p->len != i->iov_offset))
+                       goto Bad;       // ... at the end of segment
+       } else {
+               if (idx != (next & (pipe->buffers - 1)))
+                       goto Bad;       // must be right after the last buffer
+       }
+       return true;
+Bad:
+       printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
+       printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
+                       pipe->curbuf, pipe->nrbufs, pipe->buffers);
+       for (idx = 0; idx < pipe->buffers; idx++)
+               printk(KERN_ERR "[%p %p %d %d]\n",
+                       pipe->bufs[idx].ops,
+                       pipe->bufs[idx].page,
+                       pipe->bufs[idx].offset,
+                       pipe->bufs[idx].len);
+       WARN_ON(1);
+       return false;
+}
+#else
+#define sanity(i) true
+#endif
+
+static inline int next_idx(int idx, struct pipe_inode_info *pipe)
+{
+       return (idx + 1) & (pipe->buffers - 1);
+}
+
+static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       struct pipe_buffer *buf;
+       size_t off;
+       int idx;
+
+       if (unlikely(bytes > i->count))
+               bytes = i->count;
+
+       if (unlikely(!bytes))
+               return 0;
+
+       if (!sanity(i))
+               return 0;
+
+       off = i->iov_offset;
+       idx = i->idx;
+       buf = &pipe->bufs[idx];
+       if (off) {
+               if (offset == off && buf->page == page) {
+                       /* merge with the last one */
+                       buf->len += bytes;
+                       i->iov_offset += bytes;
+                       goto out;
+               }
+               idx = next_idx(idx, pipe);
+               buf = &pipe->bufs[idx];
+       }
+       if (idx == pipe->curbuf && pipe->nrbufs)
+               return 0;
+       pipe->nrbufs++;
+       buf->ops = &page_cache_pipe_buf_ops;
+       get_page(buf->page = page);
+       buf->offset = offset;
+       buf->len = bytes;
+       i->iov_offset = offset + bytes;
+       i->idx = idx;
+out:
+       i->count -= bytes;
+       return bytes;
+}
+
 /*
  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
  * bytes.  For each iovec, fault in each page that constitutes the iovec.
@@ -355,9 +445,98 @@ static void memzero_page(struct page *page, size_t offset, size_t len)
        kunmap_atomic(addr);
 }
 
+static inline bool allocated(struct pipe_buffer *buf)
+{
+       return buf->ops == &default_pipe_buf_ops;
+}
+
+static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
+{
+       size_t off = i->iov_offset;
+       int idx = i->idx;
+       if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
+               idx = next_idx(idx, i->pipe);
+               off = 0;
+       }
+       *idxp = idx;
+       *offp = off;
+}
+
+static size_t push_pipe(struct iov_iter *i, size_t size,
+                       int *idxp, size_t *offp)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       size_t off;
+       int idx;
+       ssize_t left;
+
+       if (unlikely(size > i->count))
+               size = i->count;
+       if (unlikely(!size))
+               return 0;
+
+       left = size;
+       data_start(i, &idx, &off);
+       *idxp = idx;
+       *offp = off;
+       if (off) {
+               left -= PAGE_SIZE - off;
+               if (left <= 0) {
+                       pipe->bufs[idx].len += size;
+                       return size;
+               }
+               pipe->bufs[idx].len = PAGE_SIZE;
+               idx = next_idx(idx, pipe);
+       }
+       while (idx != pipe->curbuf || !pipe->nrbufs) {
+               struct page *page = alloc_page(GFP_USER);
+               if (!page)
+                       break;
+               pipe->nrbufs++;
+               pipe->bufs[idx].ops = &default_pipe_buf_ops;
+               pipe->bufs[idx].page = page;
+               pipe->bufs[idx].offset = 0;
+               if (left <= PAGE_SIZE) {
+                       pipe->bufs[idx].len = left;
+                       return size;
+               }
+               pipe->bufs[idx].len = PAGE_SIZE;
+               left -= PAGE_SIZE;
+               idx = next_idx(idx, pipe);
+       }
+       return size - left;
+}
+
+static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
+                               struct iov_iter *i)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       size_t n, off;
+       int idx;
+
+       if (!sanity(i))
+               return 0;
+
+       bytes = n = push_pipe(i, bytes, &idx, &off);
+       if (unlikely(!n))
+               return 0;
+       for ( ; n; idx = next_idx(idx, pipe), off = 0) {
+               size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
+               memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
+               i->idx = idx;
+               i->iov_offset = off + chunk;
+               n -= chunk;
+               addr += chunk;
+       }
+       i->count -= bytes;
+       return bytes;
+}
+
 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
        const char *from = addr;
+       if (unlikely(i->type & ITER_PIPE))
+               return copy_pipe_to_iter(addr, bytes, i);
        iterate_and_advance(i, bytes, v,
                __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
                               v.iov_len),
@@ -373,6 +552,10 @@ EXPORT_SYMBOL(copy_to_iter);
 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
        char *to = addr;
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return 0;
+       }
        iterate_and_advance(i, bytes, v,
                __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
                                 v.iov_len),
@@ -388,6 +571,10 @@ EXPORT_SYMBOL(copy_from_iter);
 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
        char *to = addr;
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return 0;
+       }
        iterate_and_advance(i, bytes, v,
                __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
                                         v.iov_base, v.iov_len),
@@ -408,14 +595,20 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
                size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
                kunmap_atomic(kaddr);
                return wanted;
-       } else
+       } else if (likely(!(i->type & ITER_PIPE)))
                return copy_page_to_iter_iovec(page, offset, bytes, i);
+       else
+               return copy_page_to_iter_pipe(page, offset, bytes, i);
 }
 EXPORT_SYMBOL(copy_page_to_iter);
 
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i)
 {
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return 0;
+       }
        if (i->type & (ITER_BVEC|ITER_KVEC)) {
                void *kaddr = kmap_atomic(page);
                size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
@@ -426,8 +619,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 }
 EXPORT_SYMBOL(copy_page_from_iter);
 
+static size_t pipe_zero(size_t bytes, struct iov_iter *i)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       size_t n, off;
+       int idx;
+
+       if (!sanity(i))
+               return 0;
+
+       bytes = n = push_pipe(i, bytes, &idx, &off);
+       if (unlikely(!n))
+               return 0;
+
+       for ( ; n; idx = next_idx(idx, pipe), off = 0) {
+               size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
+               memzero_page(pipe->bufs[idx].page, off, chunk);
+               i->idx = idx;
+               i->iov_offset = off + chunk;
+               n -= chunk;
+       }
+       i->count -= bytes;
+       return bytes;
+}
+
 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 {
+       if (unlikely(i->type & ITER_PIPE))
+               return pipe_zero(bytes, i);
        iterate_and_advance(i, bytes, v,
                __clear_user(v.iov_base, v.iov_len),
                memzero_page(v.bv_page, v.bv_offset, v.bv_len),
@@ -442,6 +661,11 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
                struct iov_iter *i, unsigned long offset, size_t bytes)
 {
        char *kaddr = kmap_atomic(page), *p = kaddr + offset;
+       if (unlikely(i->type & ITER_PIPE)) {
+               kunmap_atomic(kaddr);
+               WARN_ON(1);
+               return 0;
+       }
        iterate_all_kinds(i, bytes, v,
                __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
                                          v.iov_base, v.iov_len),
@@ -454,8 +678,49 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 }
 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
+static void pipe_advance(struct iov_iter *i, size_t size)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       struct pipe_buffer *buf;
+       int idx = i->idx;
+       size_t off = i->iov_offset;
+       
+       if (unlikely(i->count < size))
+               size = i->count;
+
+       if (size) {
+               if (off) /* make it relative to the beginning of buffer */
+                       size += off - pipe->bufs[idx].offset;
+               while (1) {
+                       buf = &pipe->bufs[idx];
+                       if (size <= buf->len)
+                               break;
+                       size -= buf->len;
+                       idx = next_idx(idx, pipe);
+               }
+               buf->len = size;
+               i->idx = idx;
+               off = i->iov_offset = buf->offset + size;
+       }
+       if (off)
+               idx = next_idx(idx, pipe);
+       if (pipe->nrbufs) {
+               int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+               /* [curbuf,unused) is in use.  Free [idx,unused) */
+               while (idx != unused) {
+                       pipe_buf_release(pipe, &pipe->bufs[idx]);
+                       idx = next_idx(idx, pipe);
+                       pipe->nrbufs--;
+               }
+       }
+}
+
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
+       if (unlikely(i->type & ITER_PIPE)) {
+               pipe_advance(i, size);
+               return;
+       }
        iterate_and_advance(i, size, v, 0, 0, 0)
 }
 EXPORT_SYMBOL(iov_iter_advance);
@@ -465,6 +730,8 @@ EXPORT_SYMBOL(iov_iter_advance);
  */
 size_t iov_iter_single_seg_count(const struct iov_iter *i)
 {
+       if (unlikely(i->type & ITER_PIPE))
+               return i->count;        // it is a silly place, anyway
        if (i->nr_segs == 1)
                return i->count;
        else if (i->type & ITER_BVEC)
@@ -500,6 +767,19 @@ void iov_iter_bvec(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_bvec);
 
+void iov_iter_pipe(struct iov_iter *i, int direction,
+                       struct pipe_inode_info *pipe,
+                       size_t count)
+{
+       BUG_ON(direction != ITER_PIPE);
+       i->type = direction;
+       i->pipe = pipe;
+       i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+       i->iov_offset = 0;
+       i->count = count;
+}
+EXPORT_SYMBOL(iov_iter_pipe);
+
 unsigned long iov_iter_alignment(const struct iov_iter *i)
 {
        unsigned long res = 0;
@@ -508,6 +788,11 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
        if (!size)
                return 0;
 
+       if (unlikely(i->type & ITER_PIPE)) {
+               if (i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
+                       return size | i->iov_offset;
+               return size;
+       }
        iterate_all_kinds(i, size, v,
                (res |= (unsigned long)v.iov_base | v.iov_len, 0),
                res |= v.bv_offset | v.bv_len,
@@ -524,6 +809,11 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
        if (!size)
                return 0;
 
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return ~0U;
+       }
+
        iterate_all_kinds(i, size, v,
                (res |= (!res ? 0 : (unsigned long)v.iov_base) |
                        (size != v.iov_len ? size : 0), 0),
@@ -536,6 +826,47 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_gap_alignment);
 
+static inline size_t __pipe_get_pages(struct iov_iter *i,
+                               size_t maxsize,
+                               struct page **pages,
+                               int idx,
+                               size_t *start)
+{
+       struct pipe_inode_info *pipe = i->pipe;
+       size_t n = push_pipe(i, maxsize, &idx, start);
+       if (!n)
+               return -EFAULT;
+
+       maxsize = n;
+       n += *start;
+       while (n >= PAGE_SIZE) {
+               get_page(*pages++ = pipe->bufs[idx].page);
+               idx = next_idx(idx, pipe);
+               n -= PAGE_SIZE;
+       }
+
+       return maxsize;
+}
+
+static ssize_t pipe_get_pages(struct iov_iter *i,
+                  struct page **pages, size_t maxsize, unsigned maxpages,
+                  size_t *start)
+{
+       unsigned npages;
+       size_t capacity;
+       int idx;
+
+       if (!sanity(i))
+               return -EFAULT;
+
+       data_start(i, &idx, start);
+       /* some of this one + all after this one */
+       npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
+       capacity = min(npages,maxpages) * PAGE_SIZE - *start;
+
+       return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
+}
+
 ssize_t iov_iter_get_pages(struct iov_iter *i,
                   struct page **pages, size_t maxsize, unsigned maxpages,
                   size_t *start)
@@ -546,6 +877,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
        if (!maxsize)
                return 0;
 
+       if (unlikely(i->type & ITER_PIPE))
+               return pipe_get_pages(i, pages, maxsize, maxpages, start);
        iterate_all_kinds(i, maxsize, v, ({
                unsigned long addr = (unsigned long)v.iov_base;
                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -581,6 +914,37 @@ static struct page **get_pages_array(size_t n)
        return p;
 }
 
+static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
+                  struct page ***pages, size_t maxsize,
+                  size_t *start)
+{
+       struct page **p;
+       size_t n;
+       int idx;
+       int npages;
+
+       if (!sanity(i))
+               return -EFAULT;
+
+       data_start(i, &idx, start);
+       /* some of this one + all after this one */
+       npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
+       n = npages * PAGE_SIZE - *start;
+       if (maxsize > n)
+               maxsize = n;
+       else
+               npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
+       p = get_pages_array(npages);
+       if (!p)
+               return -ENOMEM;
+       n = __pipe_get_pages(i, maxsize, p, idx, start);
+       if (n > 0)
+               *pages = p;
+       else
+               kvfree(p);
+       return n;
+}
+
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
                   struct page ***pages, size_t maxsize,
                   size_t *start)
@@ -593,6 +957,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
        if (!maxsize)
                return 0;
 
+       if (unlikely(i->type & ITER_PIPE))
+               return pipe_get_pages_alloc(i, pages, maxsize, start);
        iterate_all_kinds(i, maxsize, v, ({
                unsigned long addr = (unsigned long)v.iov_base;
                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -634,6 +1000,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
        __wsum sum, next;
        size_t off = 0;
        sum = *csum;
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return 0;
+       }
        iterate_and_advance(i, bytes, v, ({
                int err = 0;
                next = csum_and_copy_from_user(v.iov_base, 
@@ -672,6 +1042,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
        __wsum sum, next;
        size_t off = 0;
        sum = *csum;
+       if (unlikely(i->type & ITER_PIPE)) {
+               WARN_ON(1);     /* for now */
+               return 0;
+       }
        iterate_and_advance(i, bytes, v, ({
                int err = 0;
                next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
@@ -711,7 +1085,20 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
        if (!size)
                return 0;
 
-       iterate_all_kinds(i, size, v, ({
+       if (unlikely(i->type & ITER_PIPE)) {
+               struct pipe_inode_info *pipe = i->pipe;
+               size_t off;
+               int idx;
+
+               if (!sanity(i))
+                       return 0;
+
+               data_start(i, &idx, &off);
+               /* some of this one + all after this one */
+               npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
+               if (npages >= maxpages)
+                       return maxpages;
+       } else iterate_all_kinds(i, size, v, ({
                unsigned long p = (unsigned long)v.iov_base;
                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
                        - p / PAGE_SIZE;
@@ -736,6 +1123,10 @@ EXPORT_SYMBOL(iov_iter_npages);
 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 {
        *new = *old;
+       if (unlikely(new->type & ITER_PIPE)) {
+               WARN_ON(1);
+               return NULL;
+       }
        if (new->type & ITER_BVEC)
                return new->bvec = kmemdup(new->bvec,
                                    new->nr_segs * sizeof(struct bio_vec),