Merge remote-tracking branches 'asoc/topic/cx20442' and 'asoc/topic/davinci' into...
[cascardo/linux.git] / drivers / vfio / vfio_iommu_type1.c
index 4a9d666..57d8c37 100644 (file)
@@ -66,6 +66,7 @@ struct vfio_domain {
        struct list_head        next;
        struct list_head        group_list;
        int                     prot;           /* IOMMU_CACHE */
+       bool                    fgsp;           /* Fine-grained super pages */
 };
 
 struct vfio_dma {
@@ -264,6 +265,7 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
        unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
        bool lock_cap = capable(CAP_IPC_LOCK);
        long ret, i;
+       bool rsvd;
 
        if (!current->mm)
                return -ENODEV;
@@ -272,10 +274,9 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
        if (ret)
                return ret;
 
-       if (is_invalid_reserved_pfn(*pfn_base))
-               return 1;
+       rsvd = is_invalid_reserved_pfn(*pfn_base);
 
-       if (!lock_cap && current->mm->locked_vm + 1 > limit) {
+       if (!rsvd && !lock_cap && current->mm->locked_vm + 1 > limit) {
                put_pfn(*pfn_base, prot);
                pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
                        limit << PAGE_SHIFT);
@@ -283,7 +284,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
        }
 
        if (unlikely(disable_hugepages)) {
-               vfio_lock_acct(1);
+               if (!rsvd)
+                       vfio_lock_acct(1);
                return 1;
        }
 
@@ -295,12 +297,14 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
                if (ret)
                        break;
 
-               if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) {
+               if (pfn != *pfn_base + i ||
+                   rsvd != is_invalid_reserved_pfn(pfn)) {
                        put_pfn(pfn, prot);
                        break;
                }
 
-               if (!lock_cap && current->mm->locked_vm + i + 1 > limit) {
+               if (!rsvd && !lock_cap &&
+                   current->mm->locked_vm + i + 1 > limit) {
                        put_pfn(pfn, prot);
                        pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
                                __func__, limit << PAGE_SHIFT);
@@ -308,7 +312,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
                }
        }
 
-       vfio_lock_acct(i);
+       if (!rsvd)
+               vfio_lock_acct(i);
 
        return i;
 }
@@ -346,12 +351,14 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
        domain = d = list_first_entry(&iommu->domain_list,
                                      struct vfio_domain, next);
 
-       list_for_each_entry_continue(d, &iommu->domain_list, next)
+       list_for_each_entry_continue(d, &iommu->domain_list, next) {
                iommu_unmap(d->domain, dma->iova, dma->size);
+               cond_resched();
+       }
 
        while (iova < end) {
-               size_t unmapped;
-               phys_addr_t phys;
+               size_t unmapped, len;
+               phys_addr_t phys, next;
 
                phys = iommu_iova_to_phys(domain->domain, iova);
                if (WARN_ON(!phys)) {
@@ -359,7 +366,19 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
                        continue;
                }
 
-               unmapped = iommu_unmap(domain->domain, iova, PAGE_SIZE);
+               /*
+                * To optimize for fewer iommu_unmap() calls, each of which
+                * may require hardware cache flushing, try to find the
+                * largest contiguous physical memory chunk to unmap.
+                */
+               for (len = PAGE_SIZE;
+                    !domain->fgsp && iova + len < end; len += PAGE_SIZE) {
+                       next = iommu_iova_to_phys(domain->domain, iova + len);
+                       if (next != phys + len)
+                               break;
+               }
+
+               unmapped = iommu_unmap(domain->domain, iova, len);
                if (WARN_ON(!unmapped))
                        break;
 
@@ -367,6 +386,8 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
                                             unmapped >> PAGE_SHIFT,
                                             dma->prot, false);
                iova += unmapped;
+
+               cond_resched();
        }
 
        vfio_lock_acct(-unlocked);
@@ -511,6 +532,8 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova,
                            map_try_harder(d, iova, pfn, npage, prot))
                                goto unwind;
                }
+
+               cond_resched();
        }
 
        return 0;
@@ -665,6 +688,39 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
        return 0;
 }
 
+/*
+ * We change our unmap behavior slightly depending on whether the IOMMU
+ * supports fine-grained superpages.  IOMMUs like AMD-Vi will use a superpage
+ * for practically any contiguous power-of-two mapping we give it.  This means
+ * we don't need to look for contiguous chunks ourselves to make unmapping
+ * more efficient.  On IOMMUs with coarse-grained super pages, like Intel VT-d
+ * with discrete 2M/1G/512G/1T superpages, identifying contiguous chunks
+ * significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when
+ * hugetlbfs is in use.
+ */
+static void vfio_test_domain_fgsp(struct vfio_domain *domain)
+{
+       struct page *pages;
+       int ret, order = get_order(PAGE_SIZE * 2);
+
+       pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+       if (!pages)
+               return;
+
+       ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2,
+                       IOMMU_READ | IOMMU_WRITE | domain->prot);
+       if (!ret) {
+               size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE);
+
+               if (unmapped == PAGE_SIZE)
+                       iommu_unmap(domain->domain, PAGE_SIZE, PAGE_SIZE);
+               else
+                       domain->fgsp = true;
+       }
+
+       __free_pages(pages, order);
+}
+
 static int vfio_iommu_type1_attach_group(void *iommu_data,
                                         struct iommu_group *iommu_group)
 {
@@ -758,6 +814,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
                }
        }
 
+       vfio_test_domain_fgsp(domain);
+
        /* replay mappings on new domains */
        ret = vfio_iommu_replay(iommu, domain);
        if (ret)