Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / sdma_v3_0.c
index 653ce5e..a9d1094 100644 (file)
@@ -335,12 +335,8 @@ out:
  */
 static uint32_t sdma_v3_0_ring_get_rptr(struct amdgpu_ring *ring)
 {
-       u32 rptr;
-
        /* XXX check if swapping is necessary on BE */
-       rptr = ring->adev->wb.wb[ring->rptr_offs] >> 2;
-
-       return rptr;
+       return ring->adev->wb.wb[ring->rptr_offs] >> 2;
 }
 
 /**
@@ -499,31 +495,6 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
        amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
 }
 
-unsigned init_cond_exec(struct amdgpu_ring *ring)
-{
-       unsigned ret;
-       amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
-       amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
-       amdgpu_ring_write(ring, 1);
-       ret = ring->wptr;/* this is the offset we need patch later */
-       amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
-       return ret;
-}
-
-void patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
-{
-       unsigned cur;
-       BUG_ON(ring->ring[offset] != 0x55aa55aa);
-
-       cur = ring->wptr - 1;
-       if (likely(cur > offset))
-               ring->ring[offset] = cur - offset;
-       else
-               ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
-}
-
-
 /**
  * sdma_v3_0_gfx_stop - stop the gfx async dma engines
  *
@@ -976,24 +947,16 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
                                  uint64_t pe, uint64_t src,
                                  unsigned count)
 {
-       while (count) {
-               unsigned bytes = count * 8;
-               if (bytes > 0x1FFFF8)
-                       bytes = 0x1FFFF8;
-
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
-                       SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
-               ib->ptr[ib->length_dw++] = bytes;
-               ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
-               ib->ptr[ib->length_dw++] = lower_32_bits(src);
-               ib->ptr[ib->length_dw++] = upper_32_bits(src);
-               ib->ptr[ib->length_dw++] = lower_32_bits(pe);
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
-               pe += bytes;
-               src += bytes;
-               count -= bytes / 8;
-       }
+       unsigned bytes = count * 8;
+
+       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+               SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+       ib->ptr[ib->length_dw++] = bytes;
+       ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+       ib->ptr[ib->length_dw++] = lower_32_bits(src);
+       ib->ptr[ib->length_dw++] = upper_32_bits(src);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
 }
 
 /**
@@ -1001,39 +964,27 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
  *
  * @ib: indirect buffer to fill with commands
  * @pe: addr of the page entry
- * @addr: dst addr to write into pe
+ * @value: dst addr to write into pe
  * @count: number of page entries to update
  * @incr: increase next addr by incr bytes
- * @flags: access flags
  *
  * Update PTEs by writing them manually using sDMA (CIK).
  */
-static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib,
-                                  const dma_addr_t *pages_addr, uint64_t pe,
-                                  uint64_t addr, unsigned count,
-                                  uint32_t incr, uint32_t flags)
-{
-       uint64_t value;
-       unsigned ndw;
-
-       while (count) {
-               ndw = count * 2;
-               if (ndw > 0xFFFFE)
-                       ndw = 0xFFFFE;
-
-               /* for non-physically contiguous pages (system) */
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
-                       SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
-               ib->ptr[ib->length_dw++] = pe;
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-               ib->ptr[ib->length_dw++] = ndw;
-               for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-                       value = amdgpu_vm_map_gart(pages_addr, addr);
-                       addr += incr;
-                       value |= flags;
-                       ib->ptr[ib->length_dw++] = value;
-                       ib->ptr[ib->length_dw++] = upper_32_bits(value);
-               }
+static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+                                  uint64_t value, unsigned count,
+                                  uint32_t incr)
+{
+       unsigned ndw = count * 2;
+
+       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
+               SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+       ib->ptr[ib->length_dw++] = ndw;
+       for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+               ib->ptr[ib->length_dw++] = lower_32_bits(value);
+               ib->ptr[ib->length_dw++] = upper_32_bits(value);
+               value += incr;
        }
 }
 
@@ -1049,40 +1000,21 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib,
  *
  * Update the page tables using sDMA (CIK).
  */
-static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
-                                    uint64_t pe,
+static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
                                     uint64_t addr, unsigned count,
                                     uint32_t incr, uint32_t flags)
 {
-       uint64_t value;
-       unsigned ndw;
-
-       while (count) {
-               ndw = count;
-               if (ndw > 0x7FFFF)
-                       ndw = 0x7FFFF;
-
-               if (flags & AMDGPU_PTE_VALID)
-                       value = addr;
-               else
-                       value = 0;
-
-               /* for physically contiguous pages (vram) */
-               ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
-               ib->ptr[ib->length_dw++] = pe; /* dst addr */
-               ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-               ib->ptr[ib->length_dw++] = flags; /* mask */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = value; /* value */
-               ib->ptr[ib->length_dw++] = upper_32_bits(value);
-               ib->ptr[ib->length_dw++] = incr; /* increment size */
-               ib->ptr[ib->length_dw++] = 0;
-               ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
-               pe += ndw * 8;
-               addr += ndw * incr;
-               count -= ndw;
-       }
+       /* for physically contiguous pages (vram) */
+       ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
+       ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+       ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+       ib->ptr[ib->length_dw++] = flags; /* mask */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+       ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+       ib->ptr[ib->length_dw++] = incr; /* increment size */
+       ib->ptr[ib->length_dw++] = 0;
+       ib->ptr[ib->length_dw++] = count; /* number of entries */
 }
 
 /**
@@ -1172,6 +1104,22 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
                          SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
 }
 
+static unsigned sdma_v3_0_ring_get_emit_ib_size(struct amdgpu_ring *ring)
+{
+       return
+               7 + 6; /* sdma_v3_0_ring_emit_ib */
+}
+
+static unsigned sdma_v3_0_ring_get_dma_frame_size(struct amdgpu_ring *ring)
+{
+       return
+               6 + /* sdma_v3_0_ring_emit_hdp_flush */
+               3 + /* sdma_v3_0_ring_emit_hdp_invalidate */
+               6 + /* sdma_v3_0_ring_emit_pipeline_sync */
+               12 + /* sdma_v3_0_ring_emit_vm_flush */
+               10 + 10 + 10; /* sdma_v3_0_ring_emit_fence x3 for user fence, vm fence */
+}
+
 static int sdma_v3_0_early_init(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1320,27 +1268,76 @@ static int sdma_v3_0_wait_for_idle(void *handle)
        return -ETIMEDOUT;
 }
 
-static int sdma_v3_0_soft_reset(void *handle)
+static bool sdma_v3_0_check_soft_reset(void *handle)
 {
-       u32 srbm_soft_reset = 0;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       u32 srbm_soft_reset = 0;
        u32 tmp = RREG32(mmSRBM_STATUS2);
 
-       if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
-               /* sdma0 */
-               tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
-               tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
-               WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
+       if ((tmp & SRBM_STATUS2__SDMA_BUSY_MASK) ||
+           (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK)) {
                srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
-       }
-       if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) {
-               /* sdma1 */
-               tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
-               tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
-               WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
                srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
        }
 
+       if (srbm_soft_reset) {
+               adev->sdma.srbm_soft_reset = srbm_soft_reset;
+               return true;
+       } else {
+               adev->sdma.srbm_soft_reset = 0;
+               return false;
+       }
+}
+
+static int sdma_v3_0_pre_soft_reset(void *handle)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       u32 srbm_soft_reset = 0;
+
+       if (!adev->sdma.srbm_soft_reset)
+               return 0;
+
+       srbm_soft_reset = adev->sdma.srbm_soft_reset;
+
+       if (REG_GET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA) ||
+           REG_GET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA1)) {
+               sdma_v3_0_ctx_switch_enable(adev, false);
+               sdma_v3_0_enable(adev, false);
+       }
+
+       return 0;
+}
+
+static int sdma_v3_0_post_soft_reset(void *handle)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       u32 srbm_soft_reset = 0;
+
+       if (!adev->sdma.srbm_soft_reset)
+               return 0;
+
+       srbm_soft_reset = adev->sdma.srbm_soft_reset;
+
+       if (REG_GET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA) ||
+           REG_GET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA1)) {
+               sdma_v3_0_gfx_resume(adev);
+               sdma_v3_0_rlc_resume(adev);
+       }
+
+       return 0;
+}
+
+static int sdma_v3_0_soft_reset(void *handle)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       u32 srbm_soft_reset = 0;
+       u32 tmp;
+
+       if (!adev->sdma.srbm_soft_reset)
+               return 0;
+
+       srbm_soft_reset = adev->sdma.srbm_soft_reset;
+
        if (srbm_soft_reset) {
                tmp = RREG32(mmSRBM_SOFT_RESET);
                tmp |= srbm_soft_reset;
@@ -1559,6 +1556,9 @@ const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
        .resume = sdma_v3_0_resume,
        .is_idle = sdma_v3_0_is_idle,
        .wait_for_idle = sdma_v3_0_wait_for_idle,
+       .check_soft_reset = sdma_v3_0_check_soft_reset,
+       .pre_soft_reset = sdma_v3_0_pre_soft_reset,
+       .post_soft_reset = sdma_v3_0_post_soft_reset,
        .soft_reset = sdma_v3_0_soft_reset,
        .set_clockgating_state = sdma_v3_0_set_clockgating_state,
        .set_powergating_state = sdma_v3_0_set_powergating_state,
@@ -1579,6 +1579,8 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
        .test_ib = sdma_v3_0_ring_test_ib,
        .insert_nop = sdma_v3_0_ring_insert_nop,
        .pad_ib = sdma_v3_0_ring_pad_ib,
+       .get_emit_ib_size = sdma_v3_0_ring_get_emit_ib_size,
+       .get_dma_frame_size = sdma_v3_0_ring_get_dma_frame_size,
 };
 
 static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)