ceph: fix d_obtain_alias() misuses

[cascardo/linux.git] / drivers / iommu / dma-iommu.c
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c

index 58f2fe6..ea5a9eb 100644 (file)
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -94,7 +94,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size
                 return -ENODEV;
  
         /* Use the smallest supported page size for IOVA granularity */
-       order = __ffs(domain->ops->pgsize_bitmap);
+       order = __ffs(domain->pgsize_bitmap);
         base_pfn = max_t(unsigned long, 1, base >> order);
         end_pfn = (base + size - 1) >> order;
  
@@ -190,11 +190,15 @@ static void __iommu_dma_free_pages(struct page **pages, int count)
         kvfree(pages);
  }
  
-static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
+static struct page **__iommu_dma_alloc_pages(unsigned int count,
+               unsigned long order_mask, gfp_t gfp)
  {
         struct page **pages;
         unsigned int i = 0, array_size = count * sizeof(*pages);
-       unsigned int order = MAX_ORDER;
+
+       order_mask &= (2U << MAX_ORDER) - 1;
+       if (!order_mask)
+               return NULL;
  
         if (array_size <= PAGE_SIZE)
                 pages = kzalloc(array_size, GFP_KERNEL);
@@ -208,36 +212,38 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
  
         while (count) {
                 struct page *page = NULL;
-               int j;
+               unsigned int order_size;
  
                 /*
                  * Higher-order allocations are a convenience rather
                  * than a necessity, hence using __GFP_NORETRY until
-                * falling back to single-page allocations.
+                * falling back to minimum-order allocations.
                  */
-               for (order = min_t(unsigned int, order, __fls(count));
-                    order > 0; order--) {
-                       page = alloc_pages(gfp | __GFP_NORETRY, order);
+               for (order_mask &= (2U << __fls(count)) - 1;
+                    order_mask; order_mask &= ~order_size) {
+                       unsigned int order = __fls(order_mask);
+
+                       order_size = 1U << order;
+                       page = alloc_pages((order_mask - order_size) ?
+                                          gfp | __GFP_NORETRY : gfp, order);
                         if (!page)
                                 continue;
-                       if (PageCompound(page)) {
-                               if (!split_huge_page(page))
-                                       break;
-                               __free_pages(page, order);
-                       } else {
+                       if (!order)
+                               break;
+                       if (!PageCompound(page)) {
                                 split_page(page, order);
                                 break;
+                       } else if (!split_huge_page(page)) {
+                               break;
                         }
+                       __free_pages(page, order);
                 }
-               if (!page)
-                       page = alloc_page(gfp);
                 if (!page) {
                         __iommu_dma_free_pages(pages, i);
                         return NULL;
                 }
-               j = 1 << order;
-               count -= j;
-               while (j--)
+               count -= order_size;
+               while (order_size--)
                         pages[i++] = page++;
         }
         return pages;
@@ -267,6 +273,7 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
   *      attached to an iommu_dma_domain
   * @size: Size of buffer in bytes
   * @gfp: Allocation flags
+ * @attrs: DMA attributes for this allocation
   * @prot: IOMMU mapping flags
   * @handle: Out argument for allocated DMA handle
   * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the
@@ -278,8 +285,8 @@ void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
   * Return: Array of struct page pointers describing the buffer,
   *        or NULL on failure.
   */
-struct page **iommu_dma_alloc(struct device *dev, size_t size,
-               gfp_t gfp, int prot, dma_addr_t *handle,
+struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
+               struct dma_attrs *attrs, int prot, dma_addr_t *handle,
                 void (*flush_page)(struct device *, const void *, phys_addr_t))
  {
         struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
@@ -288,11 +295,22 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size,
         struct page **pages;
         struct sg_table sgt;
         dma_addr_t dma_addr;
-       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
  
         *handle = DMA_ERROR_CODE;
  
-       pages = __iommu_dma_alloc_pages(count, gfp);
+       min_size = alloc_sizes & -alloc_sizes;
+       if (min_size < PAGE_SIZE) {
+               min_size = PAGE_SIZE;
+               alloc_sizes |= PAGE_SIZE;
+       } else {
+               size = ALIGN(size, min_size);
+       }
+       if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs))
+               alloc_sizes = min_size;
+
+       count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       pages = __iommu_dma_alloc_pages(count, alloc_sizes >> PAGE_SHIFT, gfp);
         if (!pages)
                 return NULL;
  
@@ -389,26 +407,58 @@ void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
  
  /*
   * Prepare a successfully-mapped scatterlist to give back to the caller.
- * Handling IOVA concatenation can come later, if needed
+ *
+ * At this point the segments are already laid out by iommu_dma_map_sg() to
+ * avoid individually crossing any boundaries, so we merely need to check a
+ * segment's start address to avoid concatenating across one.
   */
  static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
                 dma_addr_t dma_addr)
  {
-       struct scatterlist *s;
-       int i;
+       struct scatterlist *s, *cur = sg;
+       unsigned long seg_mask = dma_get_seg_boundary(dev);
+       unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev);
+       int i, count = 0;
  
         for_each_sg(sg, s, nents, i) {
-               /* Un-swizzling the fields here, hence the naming mismatch */
-               unsigned int s_offset = sg_dma_address(s);
+               /* Restore this segment's original unaligned fields first */
+               unsigned int s_iova_off = sg_dma_address(s);
                 unsigned int s_length = sg_dma_len(s);
-               unsigned int s_dma_len = s->length;
+               unsigned int s_iova_len = s->length;
  
-               s->offset += s_offset;
+               s->offset += s_iova_off;
                 s->length = s_length;
-               sg_dma_address(s) = dma_addr + s_offset;
-               dma_addr += s_dma_len;
+               sg_dma_address(s) = DMA_ERROR_CODE;
+               sg_dma_len(s) = 0;
+
+               /*
+                * Now fill in the real DMA data. If...
+                * - there is a valid output segment to append to
+                * - and this segment starts on an IOVA page boundary
+                * - but doesn't fall at a segment boundary
+                * - and wouldn't make the resulting output segment too long
+                */
+               if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
+                   (cur_len + s_length <= max_len)) {
+                       /* ...then concatenate it with the previous one */
+                       cur_len += s_length;
+               } else {
+                       /* Otherwise start the next output segment */
+                       if (i > 0)
+                               cur = sg_next(cur);
+                       cur_len = s_length;
+                       count++;
+
+                       sg_dma_address(cur) = dma_addr + s_iova_off;
+               }
+
+               sg_dma_len(cur) = cur_len;
+               dma_addr += s_iova_len;
+
+               if (s_length + s_iova_off < s_iova_len)
+                       cur_len = 0;
         }
-       return i;
+       return count;
  }
  
  /*
@@ -446,34 +496,40 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
         struct scatterlist *s, *prev = NULL;
         dma_addr_t dma_addr;
         size_t iova_len = 0;
+       unsigned long mask = dma_get_seg_boundary(dev);
         int i;
  
         /*
          * Work out how much IOVA space we need, and align the segments to
          * IOVA granules for the IOMMU driver to handle. With some clever
          * trickery we can modify the list in-place, but reversibly, by
-        * hiding the original data in the as-yet-unused DMA fields.
+        * stashing the unaligned parts in the as-yet-unused DMA fields.
          */
         for_each_sg(sg, s, nents, i) {
-               size_t s_offset = iova_offset(iovad, s->offset);
+               size_t s_iova_off = iova_offset(iovad, s->offset);
                 size_t s_length = s->length;
+               size_t pad_len = (mask - iova_len + 1) & mask;
  
-               sg_dma_address(s) = s_offset;
+               sg_dma_address(s) = s_iova_off;
                 sg_dma_len(s) = s_length;
-               s->offset -= s_offset;
-               s_length = iova_align(iovad, s_length + s_offset);
+               s->offset -= s_iova_off;
+               s_length = iova_align(iovad, s_length + s_iova_off);
                 s->length = s_length;
  
                 /*
-                * The simple way to avoid the rare case of a segment
-                * crossing the boundary mask is to pad the previous one
-                * to end at a naturally-aligned IOVA for this one's size,
-                * at the cost of potentially over-allocating a little.
+                * Due to the alignment of our single IOVA allocation, we can
+                * depend on these assumptions about the segment boundary mask:
+                * - If mask size >= IOVA size, then the IOVA range cannot
+                *   possibly fall across a boundary, so we don't care.
+                * - If mask size < IOVA size, then the IOVA range must start
+                *   exactly on a boundary, therefore we can lay things out
+                *   based purely on segment lengths without needing to know
+                *   the actual addresses beforehand.
+                * - The mask must be a power of 2, so pad_len == 0 if
+                *   iova_len == 0, thus we cannot dereference prev the first
+                *   time through here (i.e. before it has a meaningful value).
                  */
-               if (prev) {
-                       size_t pad_len = roundup_pow_of_two(s_length);
-
-                       pad_len = (pad_len - iova_len) & (pad_len - 1);
+               if (pad_len && pad_len < s_length - 1) {
                         prev->length += pad_len;
                         iova_len += pad_len;
                 }