Merge remote-tracking branches 'asoc/topic/ad1836', 'asoc/topic/ad193x', 'asoc/topic...
[cascardo/linux.git] / fs / aio.c
index a2f92aa..062a5f6 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
 #include <linux/eventfd.h>
 #include <linux/blkdev.h>
 #include <linux/compat.h>
-#include <linux/anon_inodes.h>
 #include <linux/migrate.h>
 #include <linux/ramfs.h>
 #include <linux/percpu-refcount.h>
+#include <linux/mount.h>
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
@@ -153,12 +153,67 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request
 static struct kmem_cache       *kiocb_cachep;
 static struct kmem_cache       *kioctx_cachep;
 
+static struct vfsmount *aio_mnt;
+
+static const struct file_operations aio_ring_fops;
+static const struct address_space_operations aio_ctx_aops;
+
+static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
+{
+       struct qstr this = QSTR_INIT("[aio]", 5);
+       struct file *file;
+       struct path path;
+       struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
+       if (IS_ERR(inode))
+               return ERR_CAST(inode);
+
+       inode->i_mapping->a_ops = &aio_ctx_aops;
+       inode->i_mapping->private_data = ctx;
+       inode->i_size = PAGE_SIZE * nr_pages;
+
+       path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
+       if (!path.dentry) {
+               iput(inode);
+               return ERR_PTR(-ENOMEM);
+       }
+       path.mnt = mntget(aio_mnt);
+
+       d_instantiate(path.dentry, inode);
+       file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
+       if (IS_ERR(file)) {
+               path_put(&path);
+               return file;
+       }
+
+       file->f_flags = O_RDWR;
+       file->private_data = ctx;
+       return file;
+}
+
+static struct dentry *aio_mount(struct file_system_type *fs_type,
+                               int flags, const char *dev_name, void *data)
+{
+       static const struct dentry_operations ops = {
+               .d_dname        = simple_dname,
+       };
+       return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1);
+}
+
 /* aio_setup
  *     Creates the slab caches used by the aio routines, panic on
  *     failure as this is done early during the boot sequence.
  */
 static int __init aio_setup(void)
 {
+       static struct file_system_type aio_fs = {
+               .name           = "aio",
+               .mount          = aio_mount,
+               .kill_sb        = kill_anon_super,
+       };
+       aio_mnt = kern_mount(&aio_fs);
+       if (IS_ERR(aio_mnt))
+               panic("Failed to create aio fs mount.");
+
        kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
        kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 
@@ -189,9 +244,14 @@ static void aio_free_ring(struct kioctx *ctx)
        int i;
 
        for (i = 0; i < ctx->nr_pages; i++) {
+               struct page *page;
                pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
                                page_count(ctx->ring_pages[i]));
-               put_page(ctx->ring_pages[i]);
+               page = ctx->ring_pages[i];
+               if (!page)
+                       continue;
+               ctx->ring_pages[i] = NULL;
+               put_page(page);
        }
 
        put_aio_ring_file(ctx);
@@ -225,18 +285,38 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
        unsigned long flags;
        int rc;
 
+       rc = 0;
+
+       /* Make sure the old page hasn't already been changed */
+       spin_lock(&mapping->private_lock);
+       ctx = mapping->private_data;
+       if (ctx) {
+               pgoff_t idx;
+               spin_lock_irqsave(&ctx->completion_lock, flags);
+               idx = old->index;
+               if (idx < (pgoff_t)ctx->nr_pages) {
+                       if (ctx->ring_pages[idx] != old)
+                               rc = -EAGAIN;
+               } else
+                       rc = -EINVAL;
+               spin_unlock_irqrestore(&ctx->completion_lock, flags);
+       } else
+               rc = -EINVAL;
+       spin_unlock(&mapping->private_lock);
+
+       if (rc != 0)
+               return rc;
+
        /* Writeback must be complete */
        BUG_ON(PageWriteback(old));
-       put_page(old);
+       get_page(new);
 
-       rc = migrate_page_move_mapping(mapping, new, old, NULL, mode);
+       rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
        if (rc != MIGRATEPAGE_SUCCESS) {
-               get_page(old);
+               put_page(new);
                return rc;
        }
 
-       get_page(new);
-
        /* We can potentially race against kioctx teardown here.  Use the
         * address_space's private data lock to protect the mapping's
         * private_data.
@@ -248,13 +328,24 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
                spin_lock_irqsave(&ctx->completion_lock, flags);
                migrate_page_copy(new, old);
                idx = old->index;
-               if (idx < (pgoff_t)ctx->nr_pages)
-                       ctx->ring_pages[idx] = new;
+               if (idx < (pgoff_t)ctx->nr_pages) {
+                       /* And only do the move if things haven't changed */
+                       if (ctx->ring_pages[idx] == old)
+                               ctx->ring_pages[idx] = new;
+                       else
+                               rc = -EAGAIN;
+               } else
+                       rc = -EINVAL;
                spin_unlock_irqrestore(&ctx->completion_lock, flags);
        } else
                rc = -EBUSY;
        spin_unlock(&mapping->private_lock);
 
+       if (rc == MIGRATEPAGE_SUCCESS)
+               put_page(old);
+       else
+               put_page(new);
+
        return rc;
 }
 #endif
@@ -271,7 +362,7 @@ static int aio_setup_ring(struct kioctx *ctx)
        struct aio_ring *ring;
        unsigned nr_events = ctx->max_reqs;
        struct mm_struct *mm = current->mm;
-       unsigned long size, populate;
+       unsigned long size, unused;
        int nr_pages;
        int i;
        struct file *file;
@@ -286,15 +377,25 @@ static int aio_setup_ring(struct kioctx *ctx)
        if (nr_pages < 0)
                return -EINVAL;
 
-       file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR);
+       file = aio_private_file(ctx, nr_pages);
        if (IS_ERR(file)) {
                ctx->aio_ring_file = NULL;
                return -EAGAIN;
        }
 
-       file->f_inode->i_mapping->a_ops = &aio_ctx_aops;
-       file->f_inode->i_mapping->private_data = ctx;
-       file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages;
+       ctx->aio_ring_file = file;
+       nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
+                       / sizeof(struct io_event);
+
+       ctx->ring_pages = ctx->internal_pages;
+       if (nr_pages > AIO_RING_PAGES) {
+               ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
+                                         GFP_KERNEL);
+               if (!ctx->ring_pages) {
+                       put_aio_ring_file(ctx);
+                       return -ENOMEM;
+               }
+       }
 
        for (i = 0; i < nr_pages; i++) {
                struct page *page;
@@ -307,19 +408,14 @@ static int aio_setup_ring(struct kioctx *ctx)
                SetPageUptodate(page);
                SetPageDirty(page);
                unlock_page(page);
+
+               ctx->ring_pages[i] = page;
        }
-       ctx->aio_ring_file = file;
-       nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
-                       / sizeof(struct io_event);
+       ctx->nr_pages = i;
 
-       ctx->ring_pages = ctx->internal_pages;
-       if (nr_pages > AIO_RING_PAGES) {
-               ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
-                                         GFP_KERNEL);
-               if (!ctx->ring_pages) {
-                       put_aio_ring_file(ctx);
-                       return -ENOMEM;
-               }
+       if (unlikely(i != nr_pages)) {
+               aio_free_ring(ctx);
+               return -EAGAIN;
        }
 
        ctx->mmap_size = nr_pages * PAGE_SIZE;
@@ -328,9 +424,9 @@ static int aio_setup_ring(struct kioctx *ctx)
        down_write(&mm->mmap_sem);
        ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
                                       PROT_READ | PROT_WRITE,
-                                      MAP_SHARED | MAP_POPULATE, 0, &populate);
+                                      MAP_SHARED, 0, &unused);
+       up_write(&mm->mmap_sem);
        if (IS_ERR((void *)ctx->mmap_base)) {
-               up_write(&mm->mmap_sem);
                ctx->mmap_size = 0;
                aio_free_ring(ctx);
                return -EAGAIN;
@@ -338,27 +434,6 @@ static int aio_setup_ring(struct kioctx *ctx)
 
        pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
 
-       /* We must do this while still holding mmap_sem for write, as we
-        * need to be protected against userspace attempting to mremap()
-        * or munmap() the ring buffer.
-        */
-       ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
-                                      1, 0, ctx->ring_pages, NULL);
-
-       /* Dropping the reference here is safe as the page cache will hold
-        * onto the pages for us.  It is also required so that page migration
-        * can unmap the pages and get the right reference count.
-        */
-       for (i = 0; i < ctx->nr_pages; i++)
-               put_page(ctx->ring_pages[i]);
-
-       up_write(&mm->mmap_sem);
-
-       if (unlikely(ctx->nr_pages != nr_pages)) {
-               aio_free_ring(ctx);
-               return -EAGAIN;
-       }
-
        ctx->user_id = ctx->mmap_base;
        ctx->nr_events = nr_events; /* trusted copy */
 
@@ -601,7 +676,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
        aio_nr += ctx->max_reqs;
        spin_unlock(&aio_nr_lock);
 
-       percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
+       percpu_ref_get(&ctx->users);    /* io_setup() will drop this ref */
+       percpu_ref_get(&ctx->reqs);     /* free_ioctx_users() will drop this */
 
        err = ioctx_add_table(ctx, mm);
        if (err)