Merge tag 'drm-intel-next-fixes-2015-04-25' of git://anongit.freedesktop.org/drm...
[cascardo/linux.git] / drivers / gpu / drm / i915 / intel_lrc.c
index e8d3da9..09df74b 100644 (file)
@@ -254,8 +254,10 @@ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
        return lrca >> 12;
 }
 
-static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
+static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring,
+                                        struct drm_i915_gem_object *ctx_obj)
 {
+       struct drm_device *dev = ring->dev;
        uint64_t desc;
        uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
 
@@ -272,6 +274,13 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
         * signalling between Command Streamers */
        /* desc |= GEN8_CTX_FORCE_RESTORE; */
 
+       /* WaEnableForceRestoreInCtxtDescForVCS:skl */
+       if (IS_GEN9(dev) &&
+           INTEL_REVID(dev) <= SKL_REVID_B0 &&
+           (ring->id == BCS || ring->id == VCS ||
+           ring->id == VECS || ring->id == VCS2))
+               desc |= GEN8_CTX_FORCE_RESTORE;
+
        return desc;
 }
 
@@ -286,13 +295,13 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
 
        /* XXX: You must always write both descriptors in the order below. */
        if (ctx_obj1)
-               temp = execlists_ctx_descriptor(ctx_obj1);
+               temp = execlists_ctx_descriptor(ring, ctx_obj1);
        else
                temp = 0;
        desc[1] = (u32)(temp >> 32);
        desc[0] = (u32)temp;
 
-       temp = execlists_ctx_descriptor(ctx_obj0);
+       temp = execlists_ctx_descriptor(ring, ctx_obj0);
        desc[3] = (u32)(temp >> 32);
        desc[2] = (u32)temp;
 
@@ -384,6 +393,26 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
                }
        }
 
+       if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
+               /*
+                * WaIdleLiteRestore: make sure we never cause a lite
+                * restore with HEAD==TAIL
+                */
+               if (req0 && req0->elsp_submitted) {
+                       /*
+                        * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
+                        * as we resubmit the request. See gen8_emit_request()
+                        * for where we prepare the padding after the end of the
+                        * request.
+                        */
+                       struct intel_ringbuffer *ringbuf;
+
+                       ringbuf = req0->ctx->engine[ring->id].ringbuf;
+                       req0->tail += 8;
+                       req0->tail &= ringbuf->size - 1;
+               }
+       }
+
        WARN_ON(req1 && req1->elsp_submitted);
 
        execlists_submit_contexts(ring, req0->ctx, req0->tail,
@@ -612,7 +641,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
  * @vmas: list of vmas.
  * @batch_obj: the batchbuffer to submit.
  * @exec_start: batchbuffer start virtual address pointer.
- * @flags: translated execbuffer call flags.
+ * @dispatch_flags: translated execbuffer call flags.
  *
  * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
  * away the submission details of the execbuffer ioctl call.
@@ -625,7 +654,7 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
                               struct drm_i915_gem_execbuffer2 *args,
                               struct list_head *vmas,
                               struct drm_i915_gem_object *batch_obj,
-                              u64 exec_start, u32 flags)
+                              u64 exec_start, u32 dispatch_flags)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
@@ -698,10 +727,12 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
                dev_priv->relative_constants_mode = instp_mode;
        }
 
-       ret = ring->emit_bb_start(ringbuf, ctx, exec_start, flags);
+       ret = ring->emit_bb_start(ringbuf, ctx, exec_start, dispatch_flags);
        if (ret)
                return ret;
 
+       trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
+
        i915_gem_execbuffer_move_to_active(vmas, ring);
        i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
 
@@ -776,7 +807,7 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf,
        return 0;
 }
 
-/**
+/*
  * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
  * @ringbuf: Logical Ringbuffer to advance.
  *
@@ -785,9 +816,10 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf,
  * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
  * point, the tail *inside* the context is updated and the ELSP written to.
  */
-void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf,
-                                          struct intel_context *ctx,
-                                          struct drm_i915_gem_request *request)
+static void
+intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf,
+                                     struct intel_context *ctx,
+                                     struct drm_i915_gem_request *request)
 {
        struct intel_engine_cs *ring = ringbuf->ring;
 
@@ -876,12 +908,9 @@ static int logical_ring_alloc_request(struct intel_engine_cs *ring,
                return ret;
        }
 
-       /* Hold a reference to the context this request belongs to
-        * (we will need it when the time comes to emit/retire the
-        * request).
-        */
        request->ctx = ctx;
        i915_gem_context_reference(request->ctx);
+       request->ringbuf = ctx->engine[ring->id].ringbuf;
 
        ring->outstanding_lazy_request = request;
        return 0;
@@ -1140,11 +1169,22 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring)
        return init_workarounds_ring(ring);
 }
 
+static int gen9_init_render_ring(struct intel_engine_cs *ring)
+{
+       int ret;
+
+       ret = gen8_init_common_ring(ring);
+       if (ret)
+               return ret;
+
+       return init_workarounds_ring(ring);
+}
+
 static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
                              struct intel_context *ctx,
-                             u64 offset, unsigned flags)
+                             u64 offset, unsigned dispatch_flags)
 {
-       bool ppgtt = !(flags & I915_DISPATCH_SECURE);
+       bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
        int ret;
 
        ret = intel_logical_ring_begin(ringbuf, ctx, 4);
@@ -1295,7 +1335,12 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf,
        u32 cmd;
        int ret;
 
-       ret = intel_logical_ring_begin(ringbuf, request->ctx, 6);
+       /*
+        * Reserve space for 2 NOOPs at the end of each request to be
+        * used as a workaround for not being allowed to do lite
+        * restore with HEAD==TAIL (WaIdleLiteRestore).
+        */
+       ret = intel_logical_ring_begin(ringbuf, request->ctx, 8);
        if (ret)
                return ret;
 
@@ -1313,9 +1358,50 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf,
        intel_logical_ring_emit(ringbuf, MI_NOOP);
        intel_logical_ring_advance_and_submit(ringbuf, request->ctx, request);
 
+       /*
+        * Here we add two extra NOOPs as padding to avoid
+        * lite restore of a context with HEAD==TAIL.
+        */
+       intel_logical_ring_emit(ringbuf, MI_NOOP);
+       intel_logical_ring_emit(ringbuf, MI_NOOP);
+       intel_logical_ring_advance(ringbuf);
+
        return 0;
 }
 
+static int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
+                                             struct intel_context *ctx)
+{
+       struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+       struct render_state so;
+       struct drm_i915_file_private *file_priv = ctx->file_priv;
+       struct drm_file *file = file_priv ? file_priv->file : NULL;
+       int ret;
+
+       ret = i915_gem_render_state_prepare(ring, &so);
+       if (ret)
+               return ret;
+
+       if (so.rodata == NULL)
+               return 0;
+
+       ret = ring->emit_bb_start(ringbuf,
+                       ctx,
+                       so.ggtt_offset,
+                       I915_DISPATCH_SECURE);
+       if (ret)
+               goto out;
+
+       i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
+
+       ret = __i915_add_request(ring, file, so.obj);
+       /* intel_logical_ring_add_request moves object to inactive if it
+        * fails */
+out:
+       i915_gem_render_state_fini(&so);
+       return ret;
+}
+
 static int gen8_init_rcs_context(struct intel_engine_cs *ring,
                       struct intel_context *ctx)
 {
@@ -1399,7 +1485,10 @@ static int logical_render_ring_init(struct drm_device *dev)
        if (HAS_L3_DPF(dev))
                ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
 
-       ring->init_hw = gen8_init_render_ring;
+       if (INTEL_INFO(dev)->gen >= 9)
+               ring->init_hw = gen9_init_render_ring;
+       else
+               ring->init_hw = gen8_init_render_ring;
        ring->init_context = gen8_init_rcs_context;
        ring->cleanup = intel_fini_pipe_control;
        ring->get_seqno = gen8_get_seqno;
@@ -1581,37 +1670,47 @@ cleanup_render_ring:
        return ret;
 }
 
-int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
-                                      struct intel_context *ctx)
+static u32
+make_rpcs(struct drm_device *dev)
 {
-       struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
-       struct render_state so;
-       struct drm_i915_file_private *file_priv = ctx->file_priv;
-       struct drm_file *file = file_priv ? file_priv->file : NULL;
-       int ret;
+       u32 rpcs = 0;
 
-       ret = i915_gem_render_state_prepare(ring, &so);
-       if (ret)
-               return ret;
-
-       if (so.rodata == NULL)
+       /*
+        * No explicit RPCS request is needed to ensure full
+        * slice/subslice/EU enablement prior to Gen9.
+       */
+       if (INTEL_INFO(dev)->gen < 9)
                return 0;
 
-       ret = ring->emit_bb_start(ringbuf,
-                       ctx,
-                       so.ggtt_offset,
-                       I915_DISPATCH_SECURE);
-       if (ret)
-               goto out;
+       /*
+        * Starting in Gen9, render power gating can leave
+        * slice/subslice/EU in a partially enabled state. We
+        * must make an explicit request through RPCS for full
+        * enablement.
+       */
+       if (INTEL_INFO(dev)->has_slice_pg) {
+               rpcs |= GEN8_RPCS_S_CNT_ENABLE;
+               rpcs |= INTEL_INFO(dev)->slice_total <<
+                       GEN8_RPCS_S_CNT_SHIFT;
+               rpcs |= GEN8_RPCS_ENABLE;
+       }
 
-       i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
+       if (INTEL_INFO(dev)->has_subslice_pg) {
+               rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
+               rpcs |= INTEL_INFO(dev)->subslice_per_slice <<
+                       GEN8_RPCS_SS_CNT_SHIFT;
+               rpcs |= GEN8_RPCS_ENABLE;
+       }
 
-       ret = __i915_add_request(ring, file, so.obj);
-       /* intel_logical_ring_add_request moves object to inactive if it
-        * fails */
-out:
-       i915_gem_render_state_fini(&so);
-       return ret;
+       if (INTEL_INFO(dev)->has_eu_pg) {
+               rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
+                       GEN8_RPCS_EU_MIN_SHIFT;
+               rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
+                       GEN8_RPCS_EU_MAX_SHIFT;
+               rpcs |= GEN8_RPCS_ENABLE;
+       }
+
+       return rpcs;
 }
 
 static int
@@ -1659,7 +1758,8 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
        reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
        reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
        reg_state[CTX_CONTEXT_CONTROL+1] =
-                       _MASKED_BIT_ENABLE((1<<3) | MI_RESTORE_INHIBIT);
+               _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
+                               CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
        reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
        reg_state[CTX_RING_HEAD+1] = 0;
        reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
@@ -1706,18 +1806,18 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
        reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
        reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
        reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
-       reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[3]);
-       reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[3]);
-       reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[2]);
-       reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[2]);
-       reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[1]);
-       reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[1]);
-       reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[0]);
-       reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[0]);
+       reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[3]->daddr);
+       reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[3]->daddr);
+       reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[2]->daddr);
+       reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[2]->daddr);
+       reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[1]->daddr);
+       reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[1]->daddr);
+       reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[0]->daddr);
+       reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[0]->daddr);
        if (ring->id == RCS) {
                reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
-               reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
-               reg_state[CTX_R_PWR_CLK_STATE+1] = 0;
+               reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
+               reg_state[CTX_R_PWR_CLK_STATE+1] = make_rpcs(dev);
        }
 
        kunmap_atomic(reg_state);
@@ -1925,3 +2025,38 @@ error_unpin_ctx:
        drm_gem_object_unreference(&ctx_obj->base);
        return ret;
 }
+
+void intel_lr_context_reset(struct drm_device *dev,
+                       struct intel_context *ctx)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_engine_cs *ring;
+       int i;
+
+       for_each_ring(ring, dev_priv, i) {
+               struct drm_i915_gem_object *ctx_obj =
+                               ctx->engine[ring->id].state;
+               struct intel_ringbuffer *ringbuf =
+                               ctx->engine[ring->id].ringbuf;
+               uint32_t *reg_state;
+               struct page *page;
+
+               if (!ctx_obj)
+                       continue;
+
+               if (i915_gem_object_get_pages(ctx_obj)) {
+                       WARN(1, "Failed get_pages for context obj\n");
+                       continue;
+               }
+               page = i915_gem_object_get_page(ctx_obj, 1);
+               reg_state = kmap_atomic(page);
+
+               reg_state[CTX_RING_HEAD+1] = 0;
+               reg_state[CTX_RING_TAIL+1] = 0;
+
+               kunmap_atomic(reg_state);
+
+               ringbuf->head = 0;
+               ringbuf->tail = 0;
+       }
+}