dax: fix race between simultaneous faults

author Matthew Wilcox <willy@linux.intel.com>

Tue, 8 Sep 2015 21:59:25 +0000 (14:59 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 8 Sep 2015 22:35:28 +0000 (15:35 -0700)
author Matthew Wilcox <willy@linux.intel.com>
Tue, 8 Sep 2015 21:59:25 +0000 (14:59 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 8 Sep 2015 22:35:28 +0000 (15:35 -0700)
diff --git a/fs/dax.c b/fs/dax.c

index c694117..9593f4b 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -272,7 +272,6 @@ static int copy_user_bh(struct page *to, struct buffer_head *bh,
  static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
                         struct vm_area_struct *vma, struct vm_fault *vmf)
  {
-       struct address_space *mapping = inode->i_mapping;
         sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
         unsigned long vaddr = (unsigned long)vmf->virtual_address;
         void *addr;
@@ -280,8 +279,6 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
         pgoff_t size;
         int error;
  
-       i_mmap_lock_read(mapping);
-
         /*
          * Check truncate didn't happen while we were allocating a block.
          * If it did, this block may or may not be still allocated to the
@@ -309,8 +306,6 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
         error = vm_insert_mixed(vma, vaddr, pfn);
  
   out:
-       i_mmap_unlock_read(mapping);
-
         return error;
  }
  
@@ -372,15 +367,17 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                          * from a read fault and we've raced with a truncate
                          */
                         error = -EIO;
-                       goto unlock_page;
+                       goto unlock;
                 }
+       } else {
+               i_mmap_lock_write(mapping);
         }
  
         error = get_block(inode, block, &bh, 0);
         if (!error && (bh.b_size < PAGE_SIZE))
                 error = -EIO;           /* fs corruption? */
         if (error)
-               goto unlock_page;
+               goto unlock;
  
         if (!buffer_mapped(&bh) && !buffer_unwritten(&bh) && !vmf->cow_page) {
                 if (vmf->flags & FAULT_FLAG_WRITE) {
@@ -391,8 +388,9 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                         if (!error && (bh.b_size < PAGE_SIZE))
                                 error = -EIO;
                         if (error)
-                               goto unlock_page;
+                               goto unlock;
                 } else {
+                       i_mmap_unlock_write(mapping);
                         return dax_load_hole(mapping, page, vmf);
                 }
         }
@@ -404,17 +402,15 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                 else
                         clear_user_highpage(new_page, vaddr);
                 if (error)
-                       goto unlock_page;
+                       goto unlock;
                 vmf->page = page;
                 if (!page) {
-                       i_mmap_lock_read(mapping);
                         /* Check we didn't race with truncate */
                         size = (i_size_read(inode) + PAGE_SIZE - 1) >>
                                                                 PAGE_SHIFT;
                         if (vmf->pgoff >= size) {
-                               i_mmap_unlock_read(mapping);
                                 error = -EIO;
-                               goto out;
+                               goto unlock;
                         }
                 }
                 return VM_FAULT_LOCKED;
@@ -450,6 +446,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                         WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
         }
  
+       if (!page)
+               i_mmap_unlock_write(mapping);
   out:
         if (error == -ENOMEM)
                 return VM_FAULT_OOM | major;
@@ -458,11 +456,14 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                 return VM_FAULT_SIGBUS | major;
         return VM_FAULT_NOPAGE | major;
  
- unlock_page:
+ unlock:
         if (page) {
                 unlock_page(page);
                 page_cache_release(page);
+       } else {
+               i_mmap_unlock_write(mapping);
         }
+
         goto out;
  }
  EXPORT_SYMBOL(__dax_fault);
@@ -540,10 +541,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
         block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
  
         bh.b_size = PMD_SIZE;
+       i_mmap_lock_write(mapping);
         length = get_block(inode, block, &bh, write);
         if (length)
                 return VM_FAULT_SIGBUS;
-       i_mmap_lock_read(mapping);
  
         /*
          * If the filesystem isn't willing to tell us the length of a hole,
@@ -607,11 +608,11 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
         }
  
   out:
-       i_mmap_unlock_read(mapping);
-
         if (buffer_unwritten(&bh))
                 complete_unwritten(&bh, !(result & VM_FAULT_ERROR));
  
+       i_mmap_unlock_write(mapping);
+
         return result;
  
   fallback:
diff --git a/mm/memory.c b/mm/memory.c

index a3f9a8c..320c42e 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2427,11 +2427,16 @@ void unmap_mapping_range(struct address_space *mapping,
                 details.last_index = ULONG_MAX;
  
  
-       /* DAX uses i_mmap_lock to serialise file truncate vs page fault */
-       i_mmap_lock_write(mapping);
+       /*
+        * DAX already holds i_mmap_lock to serialise file truncate vs
+        * page fault and page fault vs page fault.
+        */
+       if (!IS_DAX(mapping->host))
+               i_mmap_lock_write(mapping);
         if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
                 unmap_mapping_range_tree(&mapping->i_mmap, &details);
-       i_mmap_unlock_write(mapping);
+       if (!IS_DAX(mapping->host))
+               i_mmap_unlock_write(mapping);
  }
  EXPORT_SYMBOL(unmap_mapping_range);
author	Matthew Wilcox <willy@linux.intel.com>
	Tue, 8 Sep 2015 21:59:25 +0000 (14:59 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 8 Sep 2015 22:35:28 +0000 (15:35 -0700)
fs/dax.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history