dax: provide an iomap based fault handler
authorChristoph Hellwig <hch@lst.de>
Mon, 19 Sep 2016 01:24:50 +0000 (11:24 +1000)
committerDave Chinner <david@fromorbit.com>
Mon, 19 Sep 2016 01:24:50 +0000 (11:24 +1000)
Very similar to the existing dax_fault function, but instead of using
the get_block callback we rely on the iomap_ops vector from iomap.c.
That also avoids having to do two calls into the file system for write
faults.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
fs/dax.c
include/linux/dax.h

index 1f9f2d4..cc025f8 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1354,4 +1354,118 @@ iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter,
        return done ? done : ret;
 }
 EXPORT_SYMBOL_GPL(iomap_dax_rw);
+
+/**
+ * iomap_dax_fault - handle a page fault on a DAX file
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ * @ops: iomap ops passed from the file system
+ *
+ * When a page fault occurs, filesystems may call this helper in their fault
+ * or mkwrite handler for DAX files. Assumes the caller has done all the
+ * necessary locking for the page fault to proceed successfully.
+ */
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+                       struct iomap_ops *ops)
+{
+       struct address_space *mapping = vma->vm_file->f_mapping;
+       struct inode *inode = mapping->host;
+       unsigned long vaddr = (unsigned long)vmf->virtual_address;
+       loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
+       sector_t sector;
+       struct iomap iomap = { 0 };
+       unsigned flags = 0;
+       int error, major = 0;
+       void *entry;
+
+       /*
+        * Check whether offset isn't beyond end of file now. Caller is supposed
+        * to hold locks serializing us with truncate / punch hole so this is
+        * a reliable test.
+        */
+       if (pos >= i_size_read(inode))
+               return VM_FAULT_SIGBUS;
+
+       entry = grab_mapping_entry(mapping, vmf->pgoff);
+       if (IS_ERR(entry)) {
+               error = PTR_ERR(entry);
+               goto out;
+       }
+
+       if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
+               flags |= IOMAP_WRITE;
+
+       /*
+        * Note that we don't bother to use iomap_apply here: DAX required
+        * the file system block size to be equal the page size, which means
+        * that we never have to deal with more than a single extent here.
+        */
+       error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
+       if (error)
+               goto unlock_entry;
+       if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
+               error = -EIO;           /* fs corruption? */
+               goto unlock_entry;
+       }
+
+       sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9);
+
+       if (vmf->cow_page) {
+               switch (iomap.type) {
+               case IOMAP_HOLE:
+               case IOMAP_UNWRITTEN:
+                       clear_user_highpage(vmf->cow_page, vaddr);
+                       break;
+               case IOMAP_MAPPED:
+                       error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE,
+                                       vmf->cow_page, vaddr);
+                       break;
+               default:
+                       WARN_ON_ONCE(1);
+                       error = -EIO;
+                       break;
+               }
+
+               if (error)
+                       goto unlock_entry;
+               if (!radix_tree_exceptional_entry(entry)) {
+                       vmf->page = entry;
+                       return VM_FAULT_LOCKED;
+               }
+               vmf->entry = entry;
+               return VM_FAULT_DAX_LOCKED;
+       }
+
+       switch (iomap.type) {
+       case IOMAP_MAPPED:
+               if (iomap.flags & IOMAP_F_NEW) {
+                       count_vm_event(PGMAJFAULT);
+                       mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+                       major = VM_FAULT_MAJOR;
+               }
+               error = dax_insert_mapping(mapping, iomap.bdev, sector,
+                               PAGE_SIZE, &entry, vma, vmf);
+               break;
+       case IOMAP_UNWRITTEN:
+       case IOMAP_HOLE:
+               if (!(vmf->flags & FAULT_FLAG_WRITE))
+                       return dax_load_hole(mapping, entry, vmf);
+               /*FALLTHRU*/
+       default:
+               WARN_ON_ONCE(1);
+               error = -EIO;
+               break;
+       }
+
+ unlock_entry:
+       put_locked_mapping_entry(mapping, vmf->pgoff, entry);
+ out:
+       if (error == -ENOMEM)
+               return VM_FAULT_OOM | major;
+       /* -EBUSY is fine, somebody else faulted on the same PTE */
+       if (error < 0 && error != -EBUSY)
+               return VM_FAULT_SIGBUS | major;
+       return VM_FAULT_NOPAGE | major;
+}
+EXPORT_SYMBOL_GPL(iomap_dax_fault);
 #endif /* CONFIG_FS_IOMAP */
index a0595b4..add6c4b 100644 (file)
@@ -17,6 +17,8 @@ ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
                  get_block_t, dio_iodone_t, int flags);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
+int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+                       struct iomap_ops *ops);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
 void dax_wake_mapping_entry_waiter(struct address_space *mapping,