drm/radeon: Add CP init for CIK (v7)
authorAlex Deucher <alexander.deucher@amd.com>
Wed, 19 Dec 2012 02:47:44 +0000 (21:47 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 25 Jun 2013 21:50:28 +0000 (17:50 -0400)
Sets up the GFX ring and loads ucode for GFX and Compute.

Todo:
- handle compute queue setup.

v2: add documentation
v3: integrate with latest reset changes
v4: additional init fixes
v5: scratch reg write back no longer supported on CIK
v6: properly set CP_RB0_BASE_HI
v7: rebase

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/radeon/Makefile
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/cik_blit_shaders.c [new file with mode: 0644]
drivers/gpu/drm/radeon/cik_blit_shaders.h [new file with mode: 0644]
drivers/gpu/drm/radeon/cikd.h
drivers/gpu/drm/radeon/radeon_cs.c

index 88d0601..292fd25 100644 (file)
@@ -76,7 +76,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
        evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \
        evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \
        atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \
-       si_blit_shaders.o radeon_prime.o radeon_uvd.o cik.o
+       si_blit_shaders.o radeon_prime.o radeon_uvd.o cik.o cik_blit_shaders.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
index 8eec582..5712526 100644 (file)
@@ -30,6 +30,7 @@
 #include "radeon_asic.h"
 #include "cikd.h"
 #include "atom.h"
+#include "cik_blit_shaders.h"
 
 /* GFX */
 #define CIK_PFP_UCODE_SIZE 2144
@@ -1491,6 +1492,400 @@ static void cik_gpu_init(struct radeon_device *rdev)
        udelay(50);
 }
 
+/*
+ * CP.
+ * On CIK, gfx and compute now have independant command processors.
+ *
+ * GFX
+ * Gfx consists of a single ring and can process both gfx jobs and
+ * compute jobs.  The gfx CP consists of three microengines (ME):
+ * PFP - Pre-Fetch Parser
+ * ME - Micro Engine
+ * CE - Constant Engine
+ * The PFP and ME make up what is considered the Drawing Engine (DE).
+ * The CE is an asynchronous engine used for updating buffer desciptors
+ * used by the DE so that they can be loaded into cache in parallel
+ * while the DE is processing state update packets.
+ *
+ * Compute
+ * The compute CP consists of two microengines (ME):
+ * MEC1 - Compute MicroEngine 1
+ * MEC2 - Compute MicroEngine 2
+ * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
+ * The queues are exposed to userspace and are programmed directly
+ * by the compute runtime.
+ */
+/**
+ * cik_cp_gfx_enable - enable/disable the gfx CP MEs
+ *
+ * @rdev: radeon_device pointer
+ * @enable: enable or disable the MEs
+ *
+ * Halts or unhalts the gfx MEs.
+ */
+static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
+{
+       if (enable)
+               WREG32(CP_ME_CNTL, 0);
+       else {
+               WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
+               rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+       }
+       udelay(50);
+}
+
+/**
+ * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Loads the gfx PFP, ME, and CE ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
+{
+       const __be32 *fw_data;
+       int i;
+
+       if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
+               return -EINVAL;
+
+       cik_cp_gfx_enable(rdev, false);
+
+       /* PFP */
+       fw_data = (const __be32 *)rdev->pfp_fw->data;
+       WREG32(CP_PFP_UCODE_ADDR, 0);
+       for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
+               WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
+       WREG32(CP_PFP_UCODE_ADDR, 0);
+
+       /* CE */
+       fw_data = (const __be32 *)rdev->ce_fw->data;
+       WREG32(CP_CE_UCODE_ADDR, 0);
+       for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
+               WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
+       WREG32(CP_CE_UCODE_ADDR, 0);
+
+       /* ME */
+       fw_data = (const __be32 *)rdev->me_fw->data;
+       WREG32(CP_ME_RAM_WADDR, 0);
+       for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
+               WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
+       WREG32(CP_ME_RAM_WADDR, 0);
+
+       WREG32(CP_PFP_UCODE_ADDR, 0);
+       WREG32(CP_CE_UCODE_ADDR, 0);
+       WREG32(CP_ME_RAM_WADDR, 0);
+       WREG32(CP_ME_RAM_RADDR, 0);
+       return 0;
+}
+
+/**
+ * cik_cp_gfx_start - start the gfx ring
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Enables the ring and loads the clear state context and other
+ * packets required to init the ring.
+ * Returns 0 for success, error for failure.
+ */
+static int cik_cp_gfx_start(struct radeon_device *rdev)
+{
+       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+       int r, i;
+
+       /* init the CP */
+       WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
+       WREG32(CP_ENDIAN_SWAP, 0);
+       WREG32(CP_DEVICE_ID, 1);
+
+       cik_cp_gfx_enable(rdev, true);
+
+       r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
+       if (r) {
+               DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
+               return r;
+       }
+
+       /* init the CE partitions.  CE only used for gfx on CIK */
+       radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
+       radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
+       radeon_ring_write(ring, 0xc000);
+       radeon_ring_write(ring, 0xc000);
+
+       /* setup clear context state */
+       radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+       radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+
+       radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+       radeon_ring_write(ring, 0x80000000);
+       radeon_ring_write(ring, 0x80000000);
+
+       for (i = 0; i < cik_default_size; i++)
+               radeon_ring_write(ring, cik_default_state[i]);
+
+       radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
+       radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+
+       /* set clear context state */
+       radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+       radeon_ring_write(ring, 0);
+
+       radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
+       radeon_ring_write(ring, 0x00000316);
+       radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+       radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
+
+       radeon_ring_unlock_commit(rdev, ring);
+
+       return 0;
+}
+
+/**
+ * cik_cp_gfx_fini - stop the gfx ring
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the gfx ring and tear down the driver ring
+ * info.
+ */
+static void cik_cp_gfx_fini(struct radeon_device *rdev)
+{
+       cik_cp_gfx_enable(rdev, false);
+       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+}
+
+/**
+ * cik_cp_gfx_resume - setup the gfx ring buffer registers
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Program the location and size of the gfx ring buffer
+ * and test it to make sure it's working.
+ * Returns 0 for success, error for failure.
+ */
+static int cik_cp_gfx_resume(struct radeon_device *rdev)
+{
+       struct radeon_ring *ring;
+       u32 tmp;
+       u32 rb_bufsz;
+       u64 rb_addr;
+       int r;
+
+       WREG32(CP_SEM_WAIT_TIMER, 0x0);
+       WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
+
+       /* Set the write pointer delay */
+       WREG32(CP_RB_WPTR_DELAY, 0);
+
+       /* set the RB to use vmid 0 */
+       WREG32(CP_RB_VMID, 0);
+
+       WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
+
+       /* ring 0 - compute and gfx */
+       /* Set ring buffer size */
+       ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+       rb_bufsz = drm_order(ring->ring_size / 8);
+       tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+#ifdef __BIG_ENDIAN
+       tmp |= BUF_SWAP_32BIT;
+#endif
+       WREG32(CP_RB0_CNTL, tmp);
+
+       /* Initialize the ring buffer's read and write pointers */
+       WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
+       ring->wptr = 0;
+       WREG32(CP_RB0_WPTR, ring->wptr);
+
+       /* set the wb address wether it's enabled or not */
+       WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
+       WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
+
+       /* scratch register shadowing is no longer supported */
+       WREG32(SCRATCH_UMSK, 0);
+
+       if (!rdev->wb.enabled)
+               tmp |= RB_NO_UPDATE;
+
+       mdelay(1);
+       WREG32(CP_RB0_CNTL, tmp);
+
+       rb_addr = ring->gpu_addr >> 8;
+       WREG32(CP_RB0_BASE, rb_addr);
+       WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
+
+       ring->rptr = RREG32(CP_RB0_RPTR);
+
+       /* start the ring */
+       cik_cp_gfx_start(rdev);
+       rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
+       r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+       if (r) {
+               rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
+               return r;
+       }
+       return 0;
+}
+
+/**
+ * cik_cp_compute_enable - enable/disable the compute CP MEs
+ *
+ * @rdev: radeon_device pointer
+ * @enable: enable or disable the MEs
+ *
+ * Halts or unhalts the compute MEs.
+ */
+static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
+{
+       if (enable)
+               WREG32(CP_MEC_CNTL, 0);
+       else
+               WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
+       udelay(50);
+}
+
+/**
+ * cik_cp_compute_load_microcode - load the compute CP ME ucode
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Loads the compute MEC1&2 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
+{
+       const __be32 *fw_data;
+       int i;
+
+       if (!rdev->mec_fw)
+               return -EINVAL;
+
+       cik_cp_compute_enable(rdev, false);
+
+       /* MEC1 */
+       fw_data = (const __be32 *)rdev->mec_fw->data;
+       WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
+       for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
+               WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
+       WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
+
+       if (rdev->family == CHIP_KAVERI) {
+               /* MEC2 */
+               fw_data = (const __be32 *)rdev->mec_fw->data;
+               WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
+               for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
+                       WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
+               WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
+       }
+
+       return 0;
+}
+
+/**
+ * cik_cp_compute_start - start the compute queues
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Enable the compute queues.
+ * Returns 0 for success, error for failure.
+ */
+static int cik_cp_compute_start(struct radeon_device *rdev)
+{
+       //todo
+       return 0;
+}
+
+/**
+ * cik_cp_compute_fini - stop the compute queues
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the compute queues and tear down the driver queue
+ * info.
+ */
+static void cik_cp_compute_fini(struct radeon_device *rdev)
+{
+       cik_cp_compute_enable(rdev, false);
+       //todo
+}
+
+/**
+ * cik_cp_compute_resume - setup the compute queue registers
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Program the compute queues and test them to make sure they
+ * are working.
+ * Returns 0 for success, error for failure.
+ */
+static int cik_cp_compute_resume(struct radeon_device *rdev)
+{
+       int r;
+
+       //todo
+       r = cik_cp_compute_start(rdev);
+       if (r)
+               return r;
+       return 0;
+}
+
+/* XXX temporary wrappers to handle both compute and gfx */
+/* XXX */
+static void cik_cp_enable(struct radeon_device *rdev, bool enable)
+{
+       cik_cp_gfx_enable(rdev, enable);
+       cik_cp_compute_enable(rdev, enable);
+}
+
+/* XXX */
+static int cik_cp_load_microcode(struct radeon_device *rdev)
+{
+       int r;
+
+       r = cik_cp_gfx_load_microcode(rdev);
+       if (r)
+               return r;
+       r = cik_cp_compute_load_microcode(rdev);
+       if (r)
+               return r;
+
+       return 0;
+}
+
+/* XXX */
+static void cik_cp_fini(struct radeon_device *rdev)
+{
+       cik_cp_gfx_fini(rdev);
+       cik_cp_compute_fini(rdev);
+}
+
+/* XXX */
+static int cik_cp_resume(struct radeon_device *rdev)
+{
+       int r;
+
+       /* Reset all cp blocks */
+       WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
+       RREG32(GRBM_SOFT_RESET);
+       mdelay(15);
+       WREG32(GRBM_SOFT_RESET, 0);
+       RREG32(GRBM_SOFT_RESET);
+
+       r = cik_cp_load_microcode(rdev);
+       if (r)
+               return r;
+
+       r = cik_cp_gfx_resume(rdev);
+       if (r)
+               return r;
+       r = cik_cp_compute_resume(rdev);
+       if (r)
+               return r;
+
+       return 0;
+}
+
 /**
  * cik_gpu_is_lockup - check if the 3D engine is locked up
  *
diff --git a/drivers/gpu/drm/radeon/cik_blit_shaders.c b/drivers/gpu/drm/radeon/cik_blit_shaders.c
new file mode 100644 (file)
index 0000000..ff13118
--- /dev/null
@@ -0,0 +1,246 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *     Alex Deucher <alexander.deucher@amd.com>
+ */
+
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <linux/kernel.h>
+
+const u32 cik_default_state[] =
+{
+       0xc0066900,
+       0x00000000,
+       0x00000060, /* DB_RENDER_CONTROL */
+       0x00000000, /* DB_COUNT_CONTROL */
+       0x00000000, /* DB_DEPTH_VIEW */
+       0x0000002a, /* DB_RENDER_OVERRIDE */
+       0x00000000, /* DB_RENDER_OVERRIDE2 */
+       0x00000000, /* DB_HTILE_DATA_BASE */
+
+       0xc0046900,
+       0x00000008,
+       0x00000000, /* DB_DEPTH_BOUNDS_MIN */
+       0x00000000, /* DB_DEPTH_BOUNDS_MAX */
+       0x00000000, /* DB_STENCIL_CLEAR */
+       0x00000000, /* DB_DEPTH_CLEAR */
+
+       0xc0036900,
+       0x0000000f,
+       0x00000000, /* DB_DEPTH_INFO */
+       0x00000000, /* DB_Z_INFO */
+       0x00000000, /* DB_STENCIL_INFO */
+
+       0xc0016900,
+       0x00000080,
+       0x00000000, /* PA_SC_WINDOW_OFFSET */
+
+       0xc00d6900,
+       0x00000083,
+       0x0000ffff, /* PA_SC_CLIPRECT_RULE */
+       0x00000000, /* PA_SC_CLIPRECT_0_TL */
+       0x20002000, /* PA_SC_CLIPRECT_0_BR */
+       0x00000000,
+       0x20002000,
+       0x00000000,
+       0x20002000,
+       0x00000000,
+       0x20002000,
+       0xaaaaaaaa, /* PA_SC_EDGERULE */
+       0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */
+       0x0000000f, /* CB_TARGET_MASK */
+       0x0000000f, /* CB_SHADER_MASK */
+
+       0xc0226900,
+       0x00000094,
+       0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */
+       0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x80000000,
+       0x20002000,
+       0x00000000, /* PA_SC_VPORT_ZMIN_0 */
+       0x3f800000, /* PA_SC_VPORT_ZMAX_0 */
+
+       0xc0046900,
+       0x00000100,
+       0xffffffff, /* VGT_MAX_VTX_INDX */
+       0x00000000, /* VGT_MIN_VTX_INDX */
+       0x00000000, /* VGT_INDX_OFFSET */
+       0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */
+
+       0xc0046900,
+       0x00000105,
+       0x00000000, /* CB_BLEND_RED */
+       0x00000000, /* CB_BLEND_GREEN */
+       0x00000000, /* CB_BLEND_BLUE */
+       0x00000000, /* CB_BLEND_ALPHA */
+
+       0xc0016900,
+       0x000001e0,
+       0x00000000, /* CB_BLEND0_CONTROL */
+
+       0xc00c6900,
+       0x00000200,
+       0x00000000, /* DB_DEPTH_CONTROL */
+       0x00000000, /* DB_EQAA */
+       0x00cc0010, /* CB_COLOR_CONTROL */
+       0x00000210, /* DB_SHADER_CONTROL */
+       0x00010000, /* PA_CL_CLIP_CNTL */
+       0x00000004, /* PA_SU_SC_MODE_CNTL */
+       0x00000100, /* PA_CL_VTE_CNTL */
+       0x00000000, /* PA_CL_VS_OUT_CNTL */
+       0x00000000, /* PA_CL_NANINF_CNTL */
+       0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */
+       0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */
+       0x00000000, /* PA_SU_PRIM_FILTER_CNTL */
+
+       0xc0116900,
+       0x00000280,
+       0x00000000, /* PA_SU_POINT_SIZE */
+       0x00000000, /* PA_SU_POINT_MINMAX */
+       0x00000008, /* PA_SU_LINE_CNTL */
+       0x00000000, /* PA_SC_LINE_STIPPLE */
+       0x00000000, /* VGT_OUTPUT_PATH_CNTL */
+       0x00000000, /* VGT_HOS_CNTL */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000, /* VGT_GS_MODE */
+
+       0xc0026900,
+       0x00000292,
+       0x00000000, /* PA_SC_MODE_CNTL_0 */
+       0x00000000, /* PA_SC_MODE_CNTL_1 */
+
+       0xc0016900,
+       0x000002a1,
+       0x00000000, /* VGT_PRIMITIVEID_EN */
+
+       0xc0016900,
+       0x000002a5,
+       0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */
+
+       0xc0026900,
+       0x000002a8,
+       0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */
+       0x00000000,
+
+       0xc0026900,
+       0x000002ad,
+       0x00000000, /* VGT_REUSE_OFF */
+       0x00000000,
+
+       0xc0016900,
+       0x000002d5,
+       0x00000000, /* VGT_SHADER_STAGES_EN */
+
+       0xc0016900,
+       0x000002dc,
+       0x0000aa00, /* DB_ALPHA_TO_MASK */
+
+       0xc0066900,
+       0x000002de,
+       0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+
+       0xc0026900,
+       0x000002e5,
+       0x00000000, /* VGT_STRMOUT_CONFIG */
+       0x00000000,
+
+       0xc01b6900,
+       0x000002f5,
+       0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */
+       0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */
+       0x00000000, /* PA_SC_LINE_CNTL */
+       0x00000000, /* PA_SC_AA_CONFIG */
+       0x00000005, /* PA_SU_VTX_CNTL */
+       0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */
+       0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */
+       0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */
+       0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */
+       0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0x00000000,
+       0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */
+       0xffffffff,
+
+       0xc0026900,
+       0x00000316,
+       0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
+       0x00000010, /*  */
+};
+
+const u32 cik_default_size = ARRAY_SIZE(cik_default_state);
diff --git a/drivers/gpu/drm/radeon/cik_blit_shaders.h b/drivers/gpu/drm/radeon/cik_blit_shaders.h
new file mode 100644 (file)
index 0000000..dfe7314
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef CIK_BLIT_SHADERS_H
+#define CIK_BLIT_SHADERS_H
+
+extern const u32 cik_default_state[];
+
+extern const u32 cik_default_size;
+
+#endif
index 2300ae0..0d1a298 100644 (file)
 #define                MEC_ME2_HALT                                    (1 << 28)
 #define                MEC_ME1_HALT                                    (1 << 30)
 
+#define CP_MEC_CNTL                                    0x8234
+#define                MEC_ME2_HALT                                    (1 << 28)
+#define                MEC_ME1_HALT                                    (1 << 30)
+
 #define CP_ME_CNTL                                     0x86D8
 #define                CP_CE_HALT                                      (1 << 24)
 #define                CP_PFP_HALT                                     (1 << 26)
 #define                CP_ME_HALT                                      (1 << 28)
 
+#define        CP_RB0_RPTR                                     0x8700
+#define        CP_RB_WPTR_DELAY                                0x8704
+
 #define CP_MEQ_THRESHOLDS                              0x8764
 #define                MEQ1_START(x)                           ((x) << 0)
 #define                MEQ2_START(x)                           ((x) << 8)
 #define        TC_CFG_L1_VOLATILE                              0xAC88
 #define        TC_CFG_L2_VOLATILE                              0xAC8C
 
+#define        CP_RB0_BASE                                     0xC100
+#define        CP_RB0_CNTL                                     0xC104
+#define                RB_BUFSZ(x)                                     ((x) << 0)
+#define                RB_BLKSZ(x)                                     ((x) << 8)
+#define                BUF_SWAP_32BIT                                  (2 << 16)
+#define                RB_NO_UPDATE                                    (1 << 27)
+#define                RB_RPTR_WR_ENA                                  (1 << 31)
+
+#define        CP_RB0_RPTR_ADDR                                0xC10C
+#define                RB_RPTR_SWAP_32BIT                              (2 << 0)
+#define        CP_RB0_RPTR_ADDR_HI                             0xC110
+#define        CP_RB0_WPTR                                     0xC114
+
+#define        CP_DEVICE_ID                                    0xC12C
+#define        CP_ENDIAN_SWAP                                  0xC140
+#define        CP_RB_VMID                                      0xC144
+
+#define        CP_PFP_UCODE_ADDR                               0xC150
+#define        CP_PFP_UCODE_DATA                               0xC154
+#define        CP_ME_RAM_RADDR                                 0xC158
+#define        CP_ME_RAM_WADDR                                 0xC15C
+#define        CP_ME_RAM_DATA                                  0xC160
+
+#define        CP_CE_UCODE_ADDR                                0xC168
+#define        CP_CE_UCODE_DATA                                0xC16C
+#define        CP_MEC_ME1_UCODE_ADDR                           0xC170
+#define        CP_MEC_ME1_UCODE_DATA                           0xC174
+#define        CP_MEC_ME2_UCODE_ADDR                           0xC178
+#define        CP_MEC_ME2_UCODE_DATA                           0xC17C
+
+#define        CP_MAX_CONTEXT                                  0xC2B8
+
+#define        CP_RB0_BASE_HI                                  0xC2C4
+
 #define PA_SC_RASTER_CONFIG                             0x28350
 #       define RASTER_CONFIG_RB_MAP_0                   0
 #       define RASTER_CONFIG_RB_MAP_1                   1
 #       define RASTER_CONFIG_RB_MAP_2                   2
 #       define RASTER_CONFIG_RB_MAP_3                   3
 
+#define        SCRATCH_REG0                                    0x30100
+#define        SCRATCH_REG1                                    0x30104
+#define        SCRATCH_REG2                                    0x30108
+#define        SCRATCH_REG3                                    0x3010C
+#define        SCRATCH_REG4                                    0x30110
+#define        SCRATCH_REG5                                    0x30114
+#define        SCRATCH_REG6                                    0x30118
+#define        SCRATCH_REG7                                    0x3011C
+
+#define        SCRATCH_UMSK                                    0x30140
+#define        SCRATCH_ADDR                                    0x30144
+
+#define        CP_SEM_WAIT_TIMER                               0x301BC
+
+#define        CP_SEM_INCOMPLETE_TIMER_CNTL                    0x301C8
+
 #define GRBM_GFX_INDEX                                 0x30800
 #define                INSTANCE_INDEX(x)                       ((x) << 0)
 #define                SH_INDEX(x)                             ((x) << 8)
 #define                TCC_DISABLE_MASK                                0xFFFF0000
 #define                TCC_DISABLE_SHIFT                               16
 
+/*
+ * PM4
+ */
+#define        PACKET_TYPE0    0
+#define        PACKET_TYPE1    1
+#define        PACKET_TYPE2    2
+#define        PACKET_TYPE3    3
+
+#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
+#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
+#define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2)
+#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
+#define PACKET0(reg, n)        ((PACKET_TYPE0 << 30) |                         \
+                        (((reg) >> 2) & 0xFFFF) |                      \
+                        ((n) & 0x3FFF) << 16)
+#define CP_PACKET2                     0x80000000
+#define                PACKET2_PAD_SHIFT               0
+#define                PACKET2_PAD_MASK                (0x3fffffff << 0)
+
+#define PACKET2(v)     (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
+
+#define PACKET3(op, n) ((PACKET_TYPE3 << 30) |                         \
+                        (((op) & 0xFF) << 8) |                         \
+                        ((n) & 0x3FFF) << 16)
+
+#define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1)
+
+/* Packet 3 types */
+#define        PACKET3_NOP                                     0x10
+#define        PACKET3_SET_BASE                                0x11
+#define                PACKET3_BASE_INDEX(x)                  ((x) << 0)
+#define                        CE_PARTITION_BASE               3
+#define        PACKET3_CLEAR_STATE                             0x12
+#define        PACKET3_INDEX_BUFFER_SIZE                       0x13
+#define        PACKET3_DISPATCH_DIRECT                         0x15
+#define        PACKET3_DISPATCH_INDIRECT                       0x16
+#define        PACKET3_ATOMIC_GDS                              0x1D
+#define        PACKET3_ATOMIC_MEM                              0x1E
+#define        PACKET3_OCCLUSION_QUERY                         0x1F
+#define        PACKET3_SET_PREDICATION                         0x20
+#define        PACKET3_REG_RMW                                 0x21
+#define        PACKET3_COND_EXEC                               0x22
+#define        PACKET3_PRED_EXEC                               0x23
+#define        PACKET3_DRAW_INDIRECT                           0x24
+#define        PACKET3_DRAW_INDEX_INDIRECT                     0x25
+#define        PACKET3_INDEX_BASE                              0x26
+#define        PACKET3_DRAW_INDEX_2                            0x27
+#define        PACKET3_CONTEXT_CONTROL                         0x28
+#define        PACKET3_INDEX_TYPE                              0x2A
+#define        PACKET3_DRAW_INDIRECT_MULTI                     0x2C
+#define        PACKET3_DRAW_INDEX_AUTO                         0x2D
+#define        PACKET3_NUM_INSTANCES                           0x2F
+#define        PACKET3_DRAW_INDEX_MULTI_AUTO                   0x30
+#define        PACKET3_INDIRECT_BUFFER_CONST                   0x33
+#define        PACKET3_STRMOUT_BUFFER_UPDATE                   0x34
+#define        PACKET3_DRAW_INDEX_OFFSET_2                     0x35
+#define        PACKET3_DRAW_PREAMBLE                           0x36
+#define        PACKET3_WRITE_DATA                              0x37
+#define        PACKET3_DRAW_INDEX_INDIRECT_MULTI               0x38
+#define        PACKET3_MEM_SEMAPHORE                           0x39
+#define        PACKET3_COPY_DW                                 0x3B
+#define        PACKET3_WAIT_REG_MEM                            0x3C
+#define        PACKET3_INDIRECT_BUFFER                         0x3F
+#define        PACKET3_COPY_DATA                               0x40
+#define        PACKET3_PFP_SYNC_ME                             0x42
+#define        PACKET3_SURFACE_SYNC                            0x43
+#              define PACKET3_DEST_BASE_0_ENA      (1 << 0)
+#              define PACKET3_DEST_BASE_1_ENA      (1 << 1)
+#              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
+#              define PACKET3_CB1_DEST_BASE_ENA    (1 << 7)
+#              define PACKET3_CB2_DEST_BASE_ENA    (1 << 8)
+#              define PACKET3_CB3_DEST_BASE_ENA    (1 << 9)
+#              define PACKET3_CB4_DEST_BASE_ENA    (1 << 10)
+#              define PACKET3_CB5_DEST_BASE_ENA    (1 << 11)
+#              define PACKET3_CB6_DEST_BASE_ENA    (1 << 12)
+#              define PACKET3_CB7_DEST_BASE_ENA    (1 << 13)
+#              define PACKET3_DB_DEST_BASE_ENA     (1 << 14)
+#              define PACKET3_TCL1_VOL_ACTION_ENA  (1 << 15)
+#              define PACKET3_TC_VOL_ACTION_ENA    (1 << 16) /* L2 */
+#              define PACKET3_TC_WB_ACTION_ENA     (1 << 18) /* L2 */
+#              define PACKET3_DEST_BASE_2_ENA      (1 << 19)
+#              define PACKET3_DEST_BASE_3_ENA      (1 << 21)
+#              define PACKET3_TCL1_ACTION_ENA      (1 << 22)
+#              define PACKET3_TC_ACTION_ENA        (1 << 23) /* L2 */
+#              define PACKET3_CB_ACTION_ENA        (1 << 25)
+#              define PACKET3_DB_ACTION_ENA        (1 << 26)
+#              define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
+#              define PACKET3_SH_KCACHE_VOL_ACTION_ENA (1 << 28)
+#              define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
+#define        PACKET3_COND_WRITE                              0x45
+#define        PACKET3_EVENT_WRITE                             0x46
+#define                EVENT_TYPE(x)                           ((x) << 0)
+#define                EVENT_INDEX(x)                          ((x) << 8)
+                /* 0 - any non-TS event
+                * 1 - ZPASS_DONE, PIXEL_PIPE_STAT_*
+                * 2 - SAMPLE_PIPELINESTAT
+                * 3 - SAMPLE_STREAMOUTSTAT*
+                * 4 - *S_PARTIAL_FLUSH
+                * 5 - EOP events
+                * 6 - EOS events
+                */
+#define        PACKET3_EVENT_WRITE_EOP                         0x47
+#define                EOP_TCL1_VOL_ACTION_EN                  (1 << 12)
+#define                EOP_TC_VOL_ACTION_EN                    (1 << 13) /* L2 */
+#define                EOP_TC_WB_ACTION_EN                     (1 << 15) /* L2 */
+#define                EOP_TCL1_ACTION_EN                      (1 << 16)
+#define                EOP_TC_ACTION_EN                        (1 << 17) /* L2 */
+#define                CACHE_POLICY(x)                         ((x) << 25)
+                /* 0 - LRU
+                * 1 - Stream
+                * 2 - Bypass
+                */
+#define                TCL2_VOLATILE                           (1 << 27)
+#define                DATA_SEL(x)                             ((x) << 29)
+                /* 0 - discard
+                * 1 - send low 32bit data
+                * 2 - send 64bit data
+                * 3 - send 64bit GPU counter value
+                * 4 - send 64bit sys counter value
+                */
+#define                INT_SEL(x)                              ((x) << 24)
+                /* 0 - none
+                * 1 - interrupt only (DATA_SEL = 0)
+                * 2 - interrupt when data write is confirmed
+                */
+#define                DST_SEL(x)                              ((x) << 16)
+                /* 0 - MC
+                * 1 - TC/L2
+                */
+#define        PACKET3_EVENT_WRITE_EOS                         0x48
+#define        PACKET3_RELEASE_MEM                             0x49
+#define        PACKET3_PREAMBLE_CNTL                           0x4A
+#              define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE     (2 << 28)
+#              define PACKET3_PREAMBLE_END_CLEAR_STATE       (3 << 28)
+#define        PACKET3_DMA_DATA                                0x50
+#define        PACKET3_AQUIRE_MEM                              0x58
+#define        PACKET3_REWIND                                  0x59
+#define        PACKET3_LOAD_UCONFIG_REG                        0x5E
+#define        PACKET3_LOAD_SH_REG                             0x5F
+#define        PACKET3_LOAD_CONFIG_REG                         0x60
+#define        PACKET3_LOAD_CONTEXT_REG                        0x61
+#define        PACKET3_SET_CONFIG_REG                          0x68
+#define                PACKET3_SET_CONFIG_REG_START                    0x00008000
+#define                PACKET3_SET_CONFIG_REG_END                      0x0000b000
+#define        PACKET3_SET_CONTEXT_REG                         0x69
+#define                PACKET3_SET_CONTEXT_REG_START                   0x00028000
+#define                PACKET3_SET_CONTEXT_REG_END                     0x00029000
+#define        PACKET3_SET_CONTEXT_REG_INDIRECT                0x73
+#define        PACKET3_SET_SH_REG                              0x76
+#define                PACKET3_SET_SH_REG_START                        0x0000b000
+#define                PACKET3_SET_SH_REG_END                          0x0000c000
+#define        PACKET3_SET_SH_REG_OFFSET                       0x77
+#define        PACKET3_SET_QUEUE_REG                           0x78
+#define        PACKET3_SET_UCONFIG_REG                         0x79
+#define        PACKET3_SCRATCH_RAM_WRITE                       0x7D
+#define        PACKET3_SCRATCH_RAM_READ                        0x7E
+#define        PACKET3_LOAD_CONST_RAM                          0x80
+#define        PACKET3_WRITE_CONST_RAM                         0x81
+#define        PACKET3_DUMP_CONST_RAM                          0x83
+#define        PACKET3_INCREMENT_CE_COUNTER                    0x84
+#define        PACKET3_INCREMENT_DE_COUNTER                    0x85
+#define        PACKET3_WAIT_ON_CE_COUNTER                      0x86
+#define        PACKET3_WAIT_ON_DE_COUNTER_DIFF                 0x88
+
+
 #endif
index 7e265a5..cf71734 100644 (file)
@@ -121,7 +121,9 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
                p->ring = RADEON_RING_TYPE_GFX_INDEX;
                break;
        case RADEON_CS_RING_COMPUTE:
-               if (p->rdev->family >= CHIP_TAHITI) {
+               if (p->rdev->family >= CHIP_BONAIRE)
+                       p->ring = RADEON_RING_TYPE_GFX_INDEX;
+               else if (p->rdev->family >= CHIP_TAHITI) {
                        if (p->priority > 0)
                                p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
                        else