KVM: MMU: try to fix up page faults before giving up
authorPaolo Bonzini <pbonzini@redhat.com>
Tue, 7 Jun 2016 15:51:18 +0000 (17:51 +0200)
committerPaolo Bonzini <pbonzini@redhat.com>
Tue, 5 Jul 2016 12:41:26 +0000 (14:41 +0200)
The vGPU folks would like to trap the first access to a BAR by setting
vm_ops on the VMAs produced by mmap-ing a VFIO device.  The fault handler
then can use remap_pfn_range to place some non-reserved pages in the VMA.

This kind of VM_PFNMAP mapping is not handled by KVM, but follow_pfn
and fixup_user_fault together help supporting it.  The patch also supports
VM_MIXEDMAP vmas where the pfns are not reserved and thus subject to
reference counting.

Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Tested-by: Neo Jia <cjia@nvidia.com>
Reported-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
mm/gup.c
virt/kvm/kvm_main.c

index c057784..e3ac22f 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -720,6 +720,7 @@ retry:
        }
        return 0;
 }
+EXPORT_SYMBOL_GPL(fixup_user_fault);
 
 static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
                                                struct mm_struct *mm,
index 5aae59e..154b9ab 100644 (file)
@@ -1446,9 +1446,45 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
                               unsigned long addr, bool *async,
                               bool write_fault, kvm_pfn_t *p_pfn)
 {
-       *p_pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
-               vma->vm_pgoff;
-       BUG_ON(!kvm_is_reserved_pfn(*p_pfn));
+       unsigned long pfn;
+       int r;
+
+       r = follow_pfn(vma, addr, &pfn);
+       if (r) {
+               /*
+                * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
+                * not call the fault handler, so do it here.
+                */
+               bool unlocked = false;
+               r = fixup_user_fault(current, current->mm, addr,
+                                    (write_fault ? FAULT_FLAG_WRITE : 0),
+                                    &unlocked);
+               if (unlocked)
+                       return -EAGAIN;
+               if (r)
+                       return r;
+
+               r = follow_pfn(vma, addr, &pfn);
+               if (r)
+                       return r;
+
+       }
+
+
+       /*
+        * Get a reference here because callers of *hva_to_pfn* and
+        * *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the
+        * returned pfn.  This is only needed if the VMA has VM_MIXEDMAP
+        * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will
+        * simply do nothing for reserved pfns.
+        *
+        * Whoever called remap_pfn_range is also going to call e.g.
+        * unmap_mapping_range before the underlying pages are freed,
+        * causing a call to our MMU notifier.
+        */ 
+       kvm_get_pfn(pfn);
+
+       *p_pfn = pfn;
        return 0;
 }
 
@@ -1493,12 +1529,15 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
                goto exit;
        }
 
+retry:
        vma = find_vma_intersection(current->mm, addr, addr + 1);
 
        if (vma == NULL)
                pfn = KVM_PFN_ERR_FAULT;
        else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
                r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn);
+               if (r == -EAGAIN)
+                       goto retry;
                if (r < 0)
                        pfn = KVM_PFN_ERR_FAULT;
        } else {