drm/gem: Warn on illegal use of the dumb buffer interface v2
[cascardo/linux.git] / drivers / gpu / drm / i915 / i915_gem_execbuffer.c
index 60998fc..2b02fcf 100644 (file)
@@ -35,6 +35,7 @@
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
 
 #define BATCH_OFFSET_BIAS (256*1024)
@@ -94,7 +95,6 @@ eb_lookup_vmas(struct eb_vmas *eb,
               struct i915_address_space *vm,
               struct drm_file *file)
 {
-       struct drm_i915_private *dev_priv = vm->dev->dev_private;
        struct drm_i915_gem_object *obj;
        struct list_head objects;
        int i, ret;
@@ -121,6 +121,9 @@ eb_lookup_vmas(struct eb_vmas *eb,
                        goto err;
                }
 
+               WARN_ONCE(obj->base.dumb,
+                         "GPU use of dumb buffer is illegal.\n");
+
                drm_gem_object_reference(&obj->base);
                list_add_tail(&obj->obj_exec_link, &objects);
        }
@@ -129,20 +132,6 @@ eb_lookup_vmas(struct eb_vmas *eb,
        i = 0;
        while (!list_empty(&objects)) {
                struct i915_vma *vma;
-               struct i915_address_space *bind_vm = vm;
-
-               if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT &&
-                   USES_FULL_PPGTT(vm->dev)) {
-                       ret = -EINVAL;
-                       goto err;
-               }
-
-               /* If we have secure dispatch, or the userspace assures us that
-                * they know what they're doing, use the GGTT VM.
-                */
-               if (((args->flags & I915_EXEC_SECURE) &&
-                   (i == (args->buffer_count - 1))))
-                       bind_vm = &dev_priv->gtt.base;
 
                obj = list_first_entry(&objects,
                                       struct drm_i915_gem_object,
@@ -156,7 +145,7 @@ eb_lookup_vmas(struct eb_vmas *eb,
                 * from the (obj, vm) we don't run the risk of creating
                 * duplicated vmas for the same vm.
                 */
-               vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm);
+               vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
                if (IS_ERR(vma)) {
                        DRM_DEBUG("Failed to lookup VMA\n");
                        ret = PTR_ERR(vma);
@@ -307,7 +296,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
        struct drm_device *dev = obj->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        uint64_t delta = reloc->delta + target_offset;
-       uint32_t __iomem *reloc_entry;
+       uint64_t offset;
        void __iomem *reloc_page;
        int ret;
 
@@ -320,25 +309,24 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
                return ret;
 
        /* Map the page containing the relocation we're going to perform.  */
-       reloc->offset += i915_gem_obj_ggtt_offset(obj);
+       offset = i915_gem_obj_ggtt_offset(obj);
+       offset += reloc->offset;
        reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
-                       reloc->offset & PAGE_MASK);
-       reloc_entry = (uint32_t __iomem *)
-               (reloc_page + offset_in_page(reloc->offset));
-       iowrite32(lower_32_bits(delta), reloc_entry);
+                                             offset & PAGE_MASK);
+       iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
 
        if (INTEL_INFO(dev)->gen >= 8) {
-               reloc_entry += 1;
+               offset += sizeof(uint32_t);
 
-               if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) {
+               if (offset_in_page(offset) == 0) {
                        io_mapping_unmap_atomic(reloc_page);
-                       reloc_page = io_mapping_map_atomic_wc(
-                                       dev_priv->gtt.mappable,
-                                       reloc->offset + sizeof(uint32_t));
-                       reloc_entry = reloc_page;
+                       reloc_page =
+                               io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
+                                                        offset);
                }
 
-               iowrite32(upper_32_bits(delta), reloc_entry);
+               iowrite32(upper_32_bits(delta),
+                         reloc_page + offset_in_page(offset));
        }
 
        io_mapping_unmap_atomic(reloc_page);
@@ -372,12 +360,9 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
         * through the ppgtt for non_secure batchbuffers. */
        if (unlikely(IS_GEN6(dev) &&
            reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
-           !target_i915_obj->has_global_gtt_mapping)) {
-               struct i915_vma *vma =
-                       list_first_entry(&target_i915_obj->vma_list,
-                                        typeof(*vma), vma_link);
-               vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND);
-       }
+           !(target_vma->bound & GLOBAL_BIND)))
+               target_vma->bind_vma(target_vma, target_i915_obj->cache_level,
+                               GLOBAL_BIND);
 
        /* Validate that the target is in a valid r/w GPU domain */
        if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
@@ -534,14 +519,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
        return ret;
 }
 
-static int
-need_reloc_mappable(struct i915_vma *vma)
-{
-       struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-       return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
-               i915_is_ggtt(vma->vm);
-}
-
 static int
 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
                                struct intel_engine_cs *ring,
@@ -549,20 +526,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 {
        struct drm_i915_gem_object *obj = vma->obj;
        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-       bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
-       bool need_fence;
        uint64_t flags;
        int ret;
 
        flags = 0;
-
-       need_fence =
-               has_fenced_gpu_access &&
-               entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-               obj->tiling_mode != I915_TILING_NONE;
-       if (need_fence || need_reloc_mappable(vma))
-               flags |= PIN_MAPPABLE;
-
+       if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
+               flags |= PIN_GLOBAL | PIN_MAPPABLE;
        if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
                flags |= PIN_GLOBAL;
        if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
@@ -574,17 +543,13 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 
        entry->flags |= __EXEC_OBJECT_HAS_PIN;
 
-       if (has_fenced_gpu_access) {
-               if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-                       ret = i915_gem_object_get_fence(obj);
-                       if (ret)
-                               return ret;
-
-                       if (i915_gem_object_pin_fence(obj))
-                               entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+       if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+               ret = i915_gem_object_get_fence(obj);
+               if (ret)
+                       return ret;
 
-                       obj->pending_fenced_gpu_access = true;
-               }
+               if (i915_gem_object_pin_fence(obj))
+                       entry->flags |= __EXEC_OBJECT_HAS_FENCE;
        }
 
        if (entry->offset != vma->node.start) {
@@ -601,26 +566,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 }
 
 static bool
-eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
+need_reloc_mappable(struct i915_vma *vma)
 {
        struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-       struct drm_i915_gem_object *obj = vma->obj;
-       bool need_fence, need_mappable;
 
-       need_fence =
-               has_fenced_gpu_access &&
-               entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-               obj->tiling_mode != I915_TILING_NONE;
-       need_mappable = need_fence || need_reloc_mappable(vma);
+       if (entry->relocation_count == 0)
+               return false;
+
+       if (!i915_is_ggtt(vma->vm))
+               return false;
+
+       /* See also use_cpu_reloc() */
+       if (HAS_LLC(vma->obj->base.dev))
+               return false;
+
+       if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+               return false;
+
+       return true;
+}
+
+static bool
+eb_vma_misplaced(struct i915_vma *vma)
+{
+       struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+       struct drm_i915_gem_object *obj = vma->obj;
 
-       WARN_ON((need_mappable || need_fence) &&
+       WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
               !i915_is_ggtt(vma->vm));
 
        if (entry->alignment &&
            vma->node.start & (entry->alignment - 1))
                return true;
 
-       if (need_mappable && !obj->map_and_fenceable)
+       if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
                return true;
 
        if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
@@ -642,9 +621,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
        bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
        int retry;
 
-       if (list_empty(vmas))
-               return 0;
-
        i915_gem_retire_requests_ring(ring);
 
        vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
@@ -658,20 +634,21 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
                obj = vma->obj;
                entry = vma->exec_entry;
 
+               if (!has_fenced_gpu_access)
+                       entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
                need_fence =
-                       has_fenced_gpu_access &&
                        entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
                        obj->tiling_mode != I915_TILING_NONE;
                need_mappable = need_fence || need_reloc_mappable(vma);
 
-               if (need_mappable)
+               if (need_mappable) {
+                       entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
                        list_move(&vma->exec_list, &ordered_vmas);
-               else
+               else
                        list_move_tail(&vma->exec_list, &ordered_vmas);
 
                obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
                obj->base.pending_write_domain = 0;
-               obj->pending_fenced_gpu_access = false;
        }
        list_splice(&ordered_vmas, vmas);
 
@@ -696,7 +673,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
                        if (!drm_mm_node_allocated(&vma->node))
                                continue;
 
-                       if (eb_vma_misplaced(vma, has_fenced_gpu_access))
+                       if (eb_vma_misplaced(vma))
                                ret = i915_vma_unbind(vma);
                        else
                                ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -744,9 +721,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
        int i, total, ret;
        unsigned count = args->buffer_count;
 
-       if (WARN_ON(list_empty(&eb->vmas)))
-               return 0;
-
        vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
 
        /* We may process another execbuffer during the unlock... */
@@ -890,18 +864,24 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 }
 
 static int
-validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
+validate_exec_list(struct drm_device *dev,
+                  struct drm_i915_gem_exec_object2 *exec,
                   int count)
 {
-       int i;
        unsigned relocs_total = 0;
        unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
+       unsigned invalid_flags;
+       int i;
+
+       invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
+       if (USES_FULL_PPGTT(dev))
+               invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
 
        for (i = 0; i < count; i++) {
                char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
                int length; /* limited by fault_in_pages_readable() */
 
-               if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
+               if (exec[i].flags & invalid_flags)
                        return -EINVAL;
 
                /* First check for malicious input causing overflow in
@@ -951,16 +931,26 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
                return ERR_PTR(-EIO);
        }
 
+       if (i915.enable_execlists && !ctx->engine[ring->id].state) {
+               int ret = intel_lr_context_deferred_create(ctx, ring);
+               if (ret) {
+                       DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
+                       return ERR_PTR(ret);
+               }
+       }
+
        return ctx;
 }
 
-static void
+void
 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
                                   struct intel_engine_cs *ring)
 {
+       u32 seqno = intel_ring_get_seqno(ring);
        struct i915_vma *vma;
 
        list_for_each_entry(vma, vmas, exec_list) {
+               struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
                struct drm_i915_gem_object *obj = vma->obj;
                u32 old_read = obj->base.read_domains;
                u32 old_write = obj->base.write_domain;
@@ -969,24 +959,31 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
                if (obj->base.write_domain == 0)
                        obj->base.pending_read_domains |= obj->base.read_domains;
                obj->base.read_domains = obj->base.pending_read_domains;
-               obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 
                i915_vma_move_to_active(vma, ring);
                if (obj->base.write_domain) {
                        obj->dirty = 1;
-                       obj->last_write_seqno = intel_ring_get_seqno(ring);
+                       obj->last_write_seqno = seqno;
 
                        intel_fb_obj_invalidate(obj, ring);
 
                        /* update for the implicit flush after a batch */
                        obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
                }
+               if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+                       obj->last_fenced_seqno = seqno;
+                       if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+                               struct drm_i915_private *dev_priv = to_i915(ring->dev);
+                               list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
+                                              &dev_priv->mm.fence_list);
+                       }
+               }
 
                trace_i915_gem_object_change_domain(obj, old_read, old_write);
        }
 }
 
-static void
+void
 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
                                    struct drm_file *file,
                                    struct intel_engine_cs *ring,
@@ -1026,14 +1023,14 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
        return 0;
 }
 
-static int
-legacy_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
-                            struct intel_engine_cs *ring,
-                            struct intel_context *ctx,
-                            struct drm_i915_gem_execbuffer2 *args,
-                            struct list_head *vmas,
-                            struct drm_i915_gem_object *batch_obj,
-                            u64 exec_start, u32 flags)
+int
+i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
+                              struct intel_engine_cs *ring,
+                              struct intel_context *ctx,
+                              struct drm_i915_gem_execbuffer2 *args,
+                              struct list_head *vmas,
+                              struct drm_i915_gem_object *batch_obj,
+                              u64 exec_start, u32 flags)
 {
        struct drm_clip_rect *cliprects = NULL;
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1254,7 +1251,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
        if (!i915_gem_check_execbuffer(args))
                return -EINVAL;
 
-       ret = validate_exec_list(exec, args->buffer_count);
+       ret = validate_exec_list(dev, exec, args->buffer_count);
        if (ret)
                return ret;
 
@@ -1318,8 +1315,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
        i915_gem_context_reference(ctx);
 
-       vm = ctx->vm;
-       if (!USES_FULL_PPGTT(dev))
+       if (ctx->ppgtt)
+               vm = &ctx->ppgtt->base;
+       else
                vm = &dev_priv->gtt.base;
 
        eb = eb_create(args);
@@ -1370,41 +1368,54 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
                                      batch_obj,
                                      args->batch_start_offset,
                                      file->is_master);
-               if (ret)
-                       goto err;
-
-               /*
-                * XXX: Actually do this when enabling batch copy...
-                *
-                * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
-                * from MI_BATCH_BUFFER_START commands issued in the
-                * dispatch_execbuffer implementations. We specifically don't
-                * want that set when the command parser is enabled.
-                */
+               if (ret) {
+                       if (ret != -EACCES)
+                               goto err;
+               } else {
+                       /*
+                        * XXX: Actually do this when enabling batch copy...
+                        *
+                        * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
+                        * from MI_BATCH_BUFFER_START commands issued in the
+                        * dispatch_execbuffer implementations. We specifically don't
+                        * want that set when the command parser is enabled.
+                        */
+               }
        }
 
        /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
         * batch" bit. Hence we need to pin secure batches into the global gtt.
         * hsw should have this fixed, but bdw mucks it up again. */
-       if (flags & I915_DISPATCH_SECURE &&
-           !batch_obj->has_global_gtt_mapping) {
-               /* When we have multiple VMs, we'll need to make sure that we
-                * allocate space first */
-               struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
-               BUG_ON(!vma);
-               vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
-       }
+       if (flags & I915_DISPATCH_SECURE) {
+               /*
+                * So on first glance it looks freaky that we pin the batch here
+                * outside of the reservation loop. But:
+                * - The batch is already pinned into the relevant ppgtt, so we
+                *   already have the backing storage fully allocated.
+                * - No other BO uses the global gtt (well contexts, but meh),
+                *   so we don't really have issues with mutliple objects not
+                *   fitting due to fragmentation.
+                * So this is actually safe.
+                */
+               ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+               if (ret)
+                       goto err;
 
-       if (flags & I915_DISPATCH_SECURE)
                exec_start += i915_gem_obj_ggtt_offset(batch_obj);
-       else
+       else
                exec_start += i915_gem_obj_offset(batch_obj, vm);
 
-       ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
-                       args, &eb->vmas, batch_obj, exec_start, flags);
-       if (ret)
-               goto err;
+       ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
+                                     &eb->vmas, batch_obj, exec_start, flags);
 
+       /*
+        * FIXME: We crucially rely upon the active tracking for the (ppgtt)
+        * batch vma for correctness. For less ugly and less fragility this
+        * needs to be adjusted to also track the ggtt batch vma properly as
+        * active.
+        */
+       if (flags & I915_DISPATCH_SECURE)
+               i915_gem_object_ggtt_unpin(batch_obj);
 err:
        /* the request owns the ref now */
        i915_gem_context_unreference(ctx);