Merge branch 'drm-next-4.9' of git://people.freedesktop.org/~agd5f/linux into drm...
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160
161 static const u32 golden_settings_tonga_a11[] =
162 {
163         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166         mmGB_GPU_ID, 0x0000000f, 0x00000000,
167         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179
180 static const u32 tonga_golden_common_all[] =
181 {
182         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
274         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
275         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
276         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
277         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
278         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
279         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
280         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
281         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
282         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
283         mmSQ_CONFIG, 0x07f80000, 0x01180000,
284         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
285         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
286         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
287         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
288         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
289         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
290 };
291
292 static const u32 polaris11_golden_common_all[] =
293 {
294         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
296         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 };
301
302 static const u32 golden_settings_polaris10_a11[] =
303 {
304         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
305         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
307         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
312         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
313         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315         mmSQ_CONFIG, 0x07f80000, 0x07180000,
316         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322
323 static const u32 polaris10_golden_common_all[] =
324 {
325         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
327         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
328         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
332         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
333 };
334
335 static const u32 fiji_golden_common_all[] =
336 {
337         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
338         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
339         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
340         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
341         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
342         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
343         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
344         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
345         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
347 };
348
349 static const u32 golden_settings_fiji_a10[] =
350 {
351         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
352         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
353         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
354         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
355         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
356         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
357         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
358         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
359         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
360         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
361         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
362 };
363
364 static const u32 fiji_mgcg_cgcg_init[] =
365 {
366         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
367         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
368         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
373         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
375         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
377         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
384         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
385         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
386         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
387         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
388         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
391         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
392         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
393         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
396         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
397         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
398         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
399         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
401 };
402
403 static const u32 golden_settings_iceland_a11[] =
404 {
405         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
406         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
407         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
408         mmGB_GPU_ID, 0x0000000f, 0x00000000,
409         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
410         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
411         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
412         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
413         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
414         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
415         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
416         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
417         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
418         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
419         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
420         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
421 };
422
423 static const u32 iceland_golden_common_all[] =
424 {
425         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
426         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
427         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
428         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
429         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
430         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
431         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
432         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
433 };
434
435 static const u32 iceland_mgcg_cgcg_init[] =
436 {
437         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
438         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
439         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
442         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
443         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
444         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
446         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
448         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
456         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
457         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
458         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
459         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
460         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
462         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
463         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
464         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
466         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
467         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
468         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
471         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
476         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
481         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
486         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
491         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
496         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
499         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
500         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
501 };
502
503 static const u32 cz_golden_settings_a11[] =
504 {
505         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
506         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
507         mmGB_GPU_ID, 0x0000000f, 0x00000000,
508         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
509         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
510         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
511         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
512         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
513         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
516         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
517 };
518
519 static const u32 cz_golden_common_all[] =
520 {
521         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
523         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
525         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
529 };
530
531 static const u32 cz_mgcg_cgcg_init[] =
532 {
533         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
542         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
544         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
551         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
552         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
553         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
554         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
555         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
558         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
559         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
560         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
562         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
563         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
564         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
567         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
572         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
577         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
582         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
587         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
592         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
597         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
602         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
605         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
606         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
607         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
608 };
609
610 static const u32 stoney_golden_settings_a11[] =
611 {
612         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
613         mmGB_GPU_ID, 0x0000000f, 0x00000000,
614         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
615         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
616         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
617         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
618         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
619         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
620         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
621         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
622 };
623
624 static const u32 stoney_golden_common_all[] =
625 {
626         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
628         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
629         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
630         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
631         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
632         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
633         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
634 };
635
636 static const u32 stoney_mgcg_cgcg_init[] =
637 {
638         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
640         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
642         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
643         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
644 };
645
646 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
650 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
651 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
652
653 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
654 {
655         switch (adev->asic_type) {
656         case CHIP_TOPAZ:
657                 amdgpu_program_register_sequence(adev,
658                                                  iceland_mgcg_cgcg_init,
659                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
660                 amdgpu_program_register_sequence(adev,
661                                                  golden_settings_iceland_a11,
662                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
663                 amdgpu_program_register_sequence(adev,
664                                                  iceland_golden_common_all,
665                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
666                 break;
667         case CHIP_FIJI:
668                 amdgpu_program_register_sequence(adev,
669                                                  fiji_mgcg_cgcg_init,
670                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
671                 amdgpu_program_register_sequence(adev,
672                                                  golden_settings_fiji_a10,
673                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
674                 amdgpu_program_register_sequence(adev,
675                                                  fiji_golden_common_all,
676                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
677                 break;
678
679         case CHIP_TONGA:
680                 amdgpu_program_register_sequence(adev,
681                                                  tonga_mgcg_cgcg_init,
682                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
683                 amdgpu_program_register_sequence(adev,
684                                                  golden_settings_tonga_a11,
685                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
686                 amdgpu_program_register_sequence(adev,
687                                                  tonga_golden_common_all,
688                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
689                 break;
690         case CHIP_POLARIS11:
691                 amdgpu_program_register_sequence(adev,
692                                                  golden_settings_polaris11_a11,
693                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
694                 amdgpu_program_register_sequence(adev,
695                                                  polaris11_golden_common_all,
696                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
697                 break;
698         case CHIP_POLARIS10:
699                 amdgpu_program_register_sequence(adev,
700                                                  golden_settings_polaris10_a11,
701                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
702                 amdgpu_program_register_sequence(adev,
703                                                  polaris10_golden_common_all,
704                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
705                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
706                 if (adev->pdev->revision == 0xc7 &&
707                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
708                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
709                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
710                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
711                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
712                 }
713                 break;
714         case CHIP_CARRIZO:
715                 amdgpu_program_register_sequence(adev,
716                                                  cz_mgcg_cgcg_init,
717                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
718                 amdgpu_program_register_sequence(adev,
719                                                  cz_golden_settings_a11,
720                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
721                 amdgpu_program_register_sequence(adev,
722                                                  cz_golden_common_all,
723                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
724                 break;
725         case CHIP_STONEY:
726                 amdgpu_program_register_sequence(adev,
727                                                  stoney_mgcg_cgcg_init,
728                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
729                 amdgpu_program_register_sequence(adev,
730                                                  stoney_golden_settings_a11,
731                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
732                 amdgpu_program_register_sequence(adev,
733                                                  stoney_golden_common_all,
734                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
735                 break;
736         default:
737                 break;
738         }
739 }
740
741 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
742 {
743         int i;
744
745         adev->gfx.scratch.num_reg = 7;
746         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
747         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
748                 adev->gfx.scratch.free[i] = true;
749                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
750         }
751 }
752
753 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
754 {
755         struct amdgpu_device *adev = ring->adev;
756         uint32_t scratch;
757         uint32_t tmp = 0;
758         unsigned i;
759         int r;
760
761         r = amdgpu_gfx_scratch_get(adev, &scratch);
762         if (r) {
763                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
764                 return r;
765         }
766         WREG32(scratch, 0xCAFEDEAD);
767         r = amdgpu_ring_alloc(ring, 3);
768         if (r) {
769                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
770                           ring->idx, r);
771                 amdgpu_gfx_scratch_free(adev, scratch);
772                 return r;
773         }
774         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
775         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
776         amdgpu_ring_write(ring, 0xDEADBEEF);
777         amdgpu_ring_commit(ring);
778
779         for (i = 0; i < adev->usec_timeout; i++) {
780                 tmp = RREG32(scratch);
781                 if (tmp == 0xDEADBEEF)
782                         break;
783                 DRM_UDELAY(1);
784         }
785         if (i < adev->usec_timeout) {
786                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
787                          ring->idx, i);
788         } else {
789                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
790                           ring->idx, scratch, tmp);
791                 r = -EINVAL;
792         }
793         amdgpu_gfx_scratch_free(adev, scratch);
794         return r;
795 }
796
797 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
798 {
799         struct amdgpu_device *adev = ring->adev;
800         struct amdgpu_ib ib;
801         struct fence *f = NULL;
802         uint32_t scratch;
803         uint32_t tmp = 0;
804         long r;
805
806         r = amdgpu_gfx_scratch_get(adev, &scratch);
807         if (r) {
808                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
809                 return r;
810         }
811         WREG32(scratch, 0xCAFEDEAD);
812         memset(&ib, 0, sizeof(ib));
813         r = amdgpu_ib_get(adev, NULL, 256, &ib);
814         if (r) {
815                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
816                 goto err1;
817         }
818         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
819         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
820         ib.ptr[2] = 0xDEADBEEF;
821         ib.length_dw = 3;
822
823         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
824         if (r)
825                 goto err2;
826
827         r = fence_wait_timeout(f, false, timeout);
828         if (r == 0) {
829                 DRM_ERROR("amdgpu: IB test timed out.\n");
830                 r = -ETIMEDOUT;
831                 goto err2;
832         } else if (r < 0) {
833                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
834                 goto err2;
835         }
836         tmp = RREG32(scratch);
837         if (tmp == 0xDEADBEEF) {
838                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
839                 r = 0;
840         } else {
841                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
842                           scratch, tmp);
843                 r = -EINVAL;
844         }
845 err2:
846         amdgpu_ib_free(adev, &ib, NULL);
847         fence_put(f);
848 err1:
849         amdgpu_gfx_scratch_free(adev, scratch);
850         return r;
851 }
852
853
854 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
855         release_firmware(adev->gfx.pfp_fw);
856         adev->gfx.pfp_fw = NULL;
857         release_firmware(adev->gfx.me_fw);
858         adev->gfx.me_fw = NULL;
859         release_firmware(adev->gfx.ce_fw);
860         adev->gfx.ce_fw = NULL;
861         release_firmware(adev->gfx.rlc_fw);
862         adev->gfx.rlc_fw = NULL;
863         release_firmware(adev->gfx.mec_fw);
864         adev->gfx.mec_fw = NULL;
865         if ((adev->asic_type != CHIP_STONEY) &&
866             (adev->asic_type != CHIP_TOPAZ))
867                 release_firmware(adev->gfx.mec2_fw);
868         adev->gfx.mec2_fw = NULL;
869
870         kfree(adev->gfx.rlc.register_list_format);
871 }
872
873 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
874 {
875         const char *chip_name;
876         char fw_name[30];
877         int err;
878         struct amdgpu_firmware_info *info = NULL;
879         const struct common_firmware_header *header = NULL;
880         const struct gfx_firmware_header_v1_0 *cp_hdr;
881         const struct rlc_firmware_header_v2_0 *rlc_hdr;
882         unsigned int *tmp = NULL, i;
883
884         DRM_DEBUG("\n");
885
886         switch (adev->asic_type) {
887         case CHIP_TOPAZ:
888                 chip_name = "topaz";
889                 break;
890         case CHIP_TONGA:
891                 chip_name = "tonga";
892                 break;
893         case CHIP_CARRIZO:
894                 chip_name = "carrizo";
895                 break;
896         case CHIP_FIJI:
897                 chip_name = "fiji";
898                 break;
899         case CHIP_POLARIS11:
900                 chip_name = "polaris11";
901                 break;
902         case CHIP_POLARIS10:
903                 chip_name = "polaris10";
904                 break;
905         case CHIP_STONEY:
906                 chip_name = "stoney";
907                 break;
908         default:
909                 BUG();
910         }
911
912         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
913         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
914         if (err)
915                 goto out;
916         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
917         if (err)
918                 goto out;
919         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
920         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
921         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
922
923         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
924         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
925         if (err)
926                 goto out;
927         err = amdgpu_ucode_validate(adev->gfx.me_fw);
928         if (err)
929                 goto out;
930         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
931         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
933
934         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
935         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
936         if (err)
937                 goto out;
938         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
939         if (err)
940                 goto out;
941         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
942         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
944
945         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
946         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
947         if (err)
948                 goto out;
949         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
950         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
951         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
952         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
953
954         adev->gfx.rlc.save_and_restore_offset =
955                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
956         adev->gfx.rlc.clear_state_descriptor_offset =
957                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
958         adev->gfx.rlc.avail_scratch_ram_locations =
959                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
960         adev->gfx.rlc.reg_restore_list_size =
961                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
962         adev->gfx.rlc.reg_list_format_start =
963                         le32_to_cpu(rlc_hdr->reg_list_format_start);
964         adev->gfx.rlc.reg_list_format_separate_start =
965                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
966         adev->gfx.rlc.starting_offsets_start =
967                         le32_to_cpu(rlc_hdr->starting_offsets_start);
968         adev->gfx.rlc.reg_list_format_size_bytes =
969                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
970         adev->gfx.rlc.reg_list_size_bytes =
971                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
972
973         adev->gfx.rlc.register_list_format =
974                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
975                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
976
977         if (!adev->gfx.rlc.register_list_format) {
978                 err = -ENOMEM;
979                 goto out;
980         }
981
982         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
983                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
984         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
985                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
986
987         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
988
989         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
990                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
991         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
992                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
993
994         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
995         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
996         if (err)
997                 goto out;
998         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
999         if (err)
1000                 goto out;
1001         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1002         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1003         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1004
1005         if ((adev->asic_type != CHIP_STONEY) &&
1006             (adev->asic_type != CHIP_TOPAZ)) {
1007                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1008                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1009                 if (!err) {
1010                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1011                         if (err)
1012                                 goto out;
1013                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1014                                 adev->gfx.mec2_fw->data;
1015                         adev->gfx.mec2_fw_version =
1016                                 le32_to_cpu(cp_hdr->header.ucode_version);
1017                         adev->gfx.mec2_feature_version =
1018                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1019                 } else {
1020                         err = 0;
1021                         adev->gfx.mec2_fw = NULL;
1022                 }
1023         }
1024
1025         if (adev->firmware.smu_load) {
1026                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1027                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1028                 info->fw = adev->gfx.pfp_fw;
1029                 header = (const struct common_firmware_header *)info->fw->data;
1030                 adev->firmware.fw_size +=
1031                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1032
1033                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1034                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1035                 info->fw = adev->gfx.me_fw;
1036                 header = (const struct common_firmware_header *)info->fw->data;
1037                 adev->firmware.fw_size +=
1038                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1039
1040                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1041                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1042                 info->fw = adev->gfx.ce_fw;
1043                 header = (const struct common_firmware_header *)info->fw->data;
1044                 adev->firmware.fw_size +=
1045                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1046
1047                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1048                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1049                 info->fw = adev->gfx.rlc_fw;
1050                 header = (const struct common_firmware_header *)info->fw->data;
1051                 adev->firmware.fw_size +=
1052                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1055                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1056                 info->fw = adev->gfx.mec_fw;
1057                 header = (const struct common_firmware_header *)info->fw->data;
1058                 adev->firmware.fw_size +=
1059                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060
1061                 if (adev->gfx.mec2_fw) {
1062                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1063                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1064                         info->fw = adev->gfx.mec2_fw;
1065                         header = (const struct common_firmware_header *)info->fw->data;
1066                         adev->firmware.fw_size +=
1067                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1068                 }
1069
1070         }
1071
1072 out:
1073         if (err) {
1074                 dev_err(adev->dev,
1075                         "gfx8: Failed to load firmware \"%s\"\n",
1076                         fw_name);
1077                 release_firmware(adev->gfx.pfp_fw);
1078                 adev->gfx.pfp_fw = NULL;
1079                 release_firmware(adev->gfx.me_fw);
1080                 adev->gfx.me_fw = NULL;
1081                 release_firmware(adev->gfx.ce_fw);
1082                 adev->gfx.ce_fw = NULL;
1083                 release_firmware(adev->gfx.rlc_fw);
1084                 adev->gfx.rlc_fw = NULL;
1085                 release_firmware(adev->gfx.mec_fw);
1086                 adev->gfx.mec_fw = NULL;
1087                 release_firmware(adev->gfx.mec2_fw);
1088                 adev->gfx.mec2_fw = NULL;
1089         }
1090         return err;
1091 }
1092
1093 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1094                                     volatile u32 *buffer)
1095 {
1096         u32 count = 0, i;
1097         const struct cs_section_def *sect = NULL;
1098         const struct cs_extent_def *ext = NULL;
1099
1100         if (adev->gfx.rlc.cs_data == NULL)
1101                 return;
1102         if (buffer == NULL)
1103                 return;
1104
1105         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1106         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1107
1108         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1109         buffer[count++] = cpu_to_le32(0x80000000);
1110         buffer[count++] = cpu_to_le32(0x80000000);
1111
1112         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1113                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1114                         if (sect->id == SECT_CONTEXT) {
1115                                 buffer[count++] =
1116                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1117                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1118                                                 PACKET3_SET_CONTEXT_REG_START);
1119                                 for (i = 0; i < ext->reg_count; i++)
1120                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1121                         } else {
1122                                 return;
1123                         }
1124                 }
1125         }
1126
1127         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1128         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1129                         PACKET3_SET_CONTEXT_REG_START);
1130         switch (adev->asic_type) {
1131         case CHIP_TONGA:
1132         case CHIP_POLARIS10:
1133                 buffer[count++] = cpu_to_le32(0x16000012);
1134                 buffer[count++] = cpu_to_le32(0x0000002A);
1135                 break;
1136         case CHIP_POLARIS11:
1137                 buffer[count++] = cpu_to_le32(0x16000012);
1138                 buffer[count++] = cpu_to_le32(0x00000000);
1139                 break;
1140         case CHIP_FIJI:
1141                 buffer[count++] = cpu_to_le32(0x3a00161a);
1142                 buffer[count++] = cpu_to_le32(0x0000002e);
1143                 break;
1144         case CHIP_TOPAZ:
1145         case CHIP_CARRIZO:
1146                 buffer[count++] = cpu_to_le32(0x00000002);
1147                 buffer[count++] = cpu_to_le32(0x00000000);
1148                 break;
1149         case CHIP_STONEY:
1150                 buffer[count++] = cpu_to_le32(0x00000000);
1151                 buffer[count++] = cpu_to_le32(0x00000000);
1152                 break;
1153         default:
1154                 buffer[count++] = cpu_to_le32(0x00000000);
1155                 buffer[count++] = cpu_to_le32(0x00000000);
1156                 break;
1157         }
1158
1159         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1160         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1161
1162         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1163         buffer[count++] = cpu_to_le32(0);
1164 }
1165
1166 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1167 {
1168         const __le32 *fw_data;
1169         volatile u32 *dst_ptr;
1170         int me, i, max_me = 4;
1171         u32 bo_offset = 0;
1172         u32 table_offset, table_size;
1173
1174         if (adev->asic_type == CHIP_CARRIZO)
1175                 max_me = 5;
1176
1177         /* write the cp table buffer */
1178         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1179         for (me = 0; me < max_me; me++) {
1180                 if (me == 0) {
1181                         const struct gfx_firmware_header_v1_0 *hdr =
1182                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1183                         fw_data = (const __le32 *)
1184                                 (adev->gfx.ce_fw->data +
1185                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1186                         table_offset = le32_to_cpu(hdr->jt_offset);
1187                         table_size = le32_to_cpu(hdr->jt_size);
1188                 } else if (me == 1) {
1189                         const struct gfx_firmware_header_v1_0 *hdr =
1190                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1191                         fw_data = (const __le32 *)
1192                                 (adev->gfx.pfp_fw->data +
1193                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194                         table_offset = le32_to_cpu(hdr->jt_offset);
1195                         table_size = le32_to_cpu(hdr->jt_size);
1196                 } else if (me == 2) {
1197                         const struct gfx_firmware_header_v1_0 *hdr =
1198                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1199                         fw_data = (const __le32 *)
1200                                 (adev->gfx.me_fw->data +
1201                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202                         table_offset = le32_to_cpu(hdr->jt_offset);
1203                         table_size = le32_to_cpu(hdr->jt_size);
1204                 } else if (me == 3) {
1205                         const struct gfx_firmware_header_v1_0 *hdr =
1206                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1207                         fw_data = (const __le32 *)
1208                                 (adev->gfx.mec_fw->data +
1209                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210                         table_offset = le32_to_cpu(hdr->jt_offset);
1211                         table_size = le32_to_cpu(hdr->jt_size);
1212                 } else  if (me == 4) {
1213                         const struct gfx_firmware_header_v1_0 *hdr =
1214                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1215                         fw_data = (const __le32 *)
1216                                 (adev->gfx.mec2_fw->data +
1217                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218                         table_offset = le32_to_cpu(hdr->jt_offset);
1219                         table_size = le32_to_cpu(hdr->jt_size);
1220                 }
1221
1222                 for (i = 0; i < table_size; i ++) {
1223                         dst_ptr[bo_offset + i] =
1224                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1225                 }
1226
1227                 bo_offset += table_size;
1228         }
1229 }
1230
1231 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1232 {
1233         int r;
1234
1235         /* clear state block */
1236         if (adev->gfx.rlc.clear_state_obj) {
1237                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1238                 if (unlikely(r != 0))
1239                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1240                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1241                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1242                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1243                 adev->gfx.rlc.clear_state_obj = NULL;
1244         }
1245
1246         /* jump table block */
1247         if (adev->gfx.rlc.cp_table_obj) {
1248                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1249                 if (unlikely(r != 0))
1250                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1251                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1252                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1253                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1254                 adev->gfx.rlc.cp_table_obj = NULL;
1255         }
1256 }
1257
1258 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1259 {
1260         volatile u32 *dst_ptr;
1261         u32 dws;
1262         const struct cs_section_def *cs_data;
1263         int r;
1264
1265         adev->gfx.rlc.cs_data = vi_cs_data;
1266
1267         cs_data = adev->gfx.rlc.cs_data;
1268
1269         if (cs_data) {
1270                 /* clear state block */
1271                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1272
1273                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1274                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1275                                              AMDGPU_GEM_DOMAIN_VRAM,
1276                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1277                                              NULL, NULL,
1278                                              &adev->gfx.rlc.clear_state_obj);
1279                         if (r) {
1280                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1281                                 gfx_v8_0_rlc_fini(adev);
1282                                 return r;
1283                         }
1284                 }
1285                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1286                 if (unlikely(r != 0)) {
1287                         gfx_v8_0_rlc_fini(adev);
1288                         return r;
1289                 }
1290                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1291                                   &adev->gfx.rlc.clear_state_gpu_addr);
1292                 if (r) {
1293                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1294                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1295                         gfx_v8_0_rlc_fini(adev);
1296                         return r;
1297                 }
1298
1299                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1300                 if (r) {
1301                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1302                         gfx_v8_0_rlc_fini(adev);
1303                         return r;
1304                 }
1305                 /* set up the cs buffer */
1306                 dst_ptr = adev->gfx.rlc.cs_ptr;
1307                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1308                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1309                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1310         }
1311
1312         if ((adev->asic_type == CHIP_CARRIZO) ||
1313             (adev->asic_type == CHIP_STONEY)) {
1314                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1315                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1316                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1317                                              AMDGPU_GEM_DOMAIN_VRAM,
1318                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1319                                              NULL, NULL,
1320                                              &adev->gfx.rlc.cp_table_obj);
1321                         if (r) {
1322                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1323                                 return r;
1324                         }
1325                 }
1326
1327                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1328                 if (unlikely(r != 0)) {
1329                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1330                         return r;
1331                 }
1332                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1333                                   &adev->gfx.rlc.cp_table_gpu_addr);
1334                 if (r) {
1335                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1336                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1337                         return r;
1338                 }
1339                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1340                 if (r) {
1341                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1342                         return r;
1343                 }
1344
1345                 cz_init_cp_jump_table(adev);
1346
1347                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1348                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1349         }
1350
1351         return 0;
1352 }
1353
1354 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1355 {
1356         int r;
1357
1358         if (adev->gfx.mec.hpd_eop_obj) {
1359                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1360                 if (unlikely(r != 0))
1361                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1362                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1363                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1364                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1365                 adev->gfx.mec.hpd_eop_obj = NULL;
1366         }
1367 }
1368
1369 #define MEC_HPD_SIZE 2048
1370
1371 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1372 {
1373         int r;
1374         u32 *hpd;
1375
1376         /*
1377          * we assign only 1 pipe because all other pipes will
1378          * be handled by KFD
1379          */
1380         adev->gfx.mec.num_mec = 1;
1381         adev->gfx.mec.num_pipe = 1;
1382         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1383
1384         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1385                 r = amdgpu_bo_create(adev,
1386                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1387                                      PAGE_SIZE, true,
1388                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1389                                      &adev->gfx.mec.hpd_eop_obj);
1390                 if (r) {
1391                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1392                         return r;
1393                 }
1394         }
1395
1396         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1397         if (unlikely(r != 0)) {
1398                 gfx_v8_0_mec_fini(adev);
1399                 return r;
1400         }
1401         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1402                           &adev->gfx.mec.hpd_eop_gpu_addr);
1403         if (r) {
1404                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1405                 gfx_v8_0_mec_fini(adev);
1406                 return r;
1407         }
1408         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1409         if (r) {
1410                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1411                 gfx_v8_0_mec_fini(adev);
1412                 return r;
1413         }
1414
1415         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1416
1417         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1418         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1419
1420         return 0;
1421 }
1422
1423 static const u32 vgpr_init_compute_shader[] =
1424 {
1425         0x7e000209, 0x7e020208,
1426         0x7e040207, 0x7e060206,
1427         0x7e080205, 0x7e0a0204,
1428         0x7e0c0203, 0x7e0e0202,
1429         0x7e100201, 0x7e120200,
1430         0x7e140209, 0x7e160208,
1431         0x7e180207, 0x7e1a0206,
1432         0x7e1c0205, 0x7e1e0204,
1433         0x7e200203, 0x7e220202,
1434         0x7e240201, 0x7e260200,
1435         0x7e280209, 0x7e2a0208,
1436         0x7e2c0207, 0x7e2e0206,
1437         0x7e300205, 0x7e320204,
1438         0x7e340203, 0x7e360202,
1439         0x7e380201, 0x7e3a0200,
1440         0x7e3c0209, 0x7e3e0208,
1441         0x7e400207, 0x7e420206,
1442         0x7e440205, 0x7e460204,
1443         0x7e480203, 0x7e4a0202,
1444         0x7e4c0201, 0x7e4e0200,
1445         0x7e500209, 0x7e520208,
1446         0x7e540207, 0x7e560206,
1447         0x7e580205, 0x7e5a0204,
1448         0x7e5c0203, 0x7e5e0202,
1449         0x7e600201, 0x7e620200,
1450         0x7e640209, 0x7e660208,
1451         0x7e680207, 0x7e6a0206,
1452         0x7e6c0205, 0x7e6e0204,
1453         0x7e700203, 0x7e720202,
1454         0x7e740201, 0x7e760200,
1455         0x7e780209, 0x7e7a0208,
1456         0x7e7c0207, 0x7e7e0206,
1457         0xbf8a0000, 0xbf810000,
1458 };
1459
1460 static const u32 sgpr_init_compute_shader[] =
1461 {
1462         0xbe8a0100, 0xbe8c0102,
1463         0xbe8e0104, 0xbe900106,
1464         0xbe920108, 0xbe940100,
1465         0xbe960102, 0xbe980104,
1466         0xbe9a0106, 0xbe9c0108,
1467         0xbe9e0100, 0xbea00102,
1468         0xbea20104, 0xbea40106,
1469         0xbea60108, 0xbea80100,
1470         0xbeaa0102, 0xbeac0104,
1471         0xbeae0106, 0xbeb00108,
1472         0xbeb20100, 0xbeb40102,
1473         0xbeb60104, 0xbeb80106,
1474         0xbeba0108, 0xbebc0100,
1475         0xbebe0102, 0xbec00104,
1476         0xbec20106, 0xbec40108,
1477         0xbec60100, 0xbec80102,
1478         0xbee60004, 0xbee70005,
1479         0xbeea0006, 0xbeeb0007,
1480         0xbee80008, 0xbee90009,
1481         0xbefc0000, 0xbf8a0000,
1482         0xbf810000, 0x00000000,
1483 };
1484
1485 static const u32 vgpr_init_regs[] =
1486 {
1487         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1488         mmCOMPUTE_RESOURCE_LIMITS, 0,
1489         mmCOMPUTE_NUM_THREAD_X, 256*4,
1490         mmCOMPUTE_NUM_THREAD_Y, 1,
1491         mmCOMPUTE_NUM_THREAD_Z, 1,
1492         mmCOMPUTE_PGM_RSRC2, 20,
1493         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1494         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1495         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1496         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1497         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1498         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1499         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1500         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1501         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1502         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1503 };
1504
1505 static const u32 sgpr1_init_regs[] =
1506 {
1507         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1508         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1509         mmCOMPUTE_NUM_THREAD_X, 256*5,
1510         mmCOMPUTE_NUM_THREAD_Y, 1,
1511         mmCOMPUTE_NUM_THREAD_Z, 1,
1512         mmCOMPUTE_PGM_RSRC2, 20,
1513         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1514         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1515         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1516         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1517         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1518         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1519         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1520         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1521         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1522         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1523 };
1524
1525 static const u32 sgpr2_init_regs[] =
1526 {
1527         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1528         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1529         mmCOMPUTE_NUM_THREAD_X, 256*5,
1530         mmCOMPUTE_NUM_THREAD_Y, 1,
1531         mmCOMPUTE_NUM_THREAD_Z, 1,
1532         mmCOMPUTE_PGM_RSRC2, 20,
1533         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1534         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1535         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1536         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1537         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1538         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1539         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1540         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1541         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1542         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1543 };
1544
1545 static const u32 sec_ded_counter_registers[] =
1546 {
1547         mmCPC_EDC_ATC_CNT,
1548         mmCPC_EDC_SCRATCH_CNT,
1549         mmCPC_EDC_UCODE_CNT,
1550         mmCPF_EDC_ATC_CNT,
1551         mmCPF_EDC_ROQ_CNT,
1552         mmCPF_EDC_TAG_CNT,
1553         mmCPG_EDC_ATC_CNT,
1554         mmCPG_EDC_DMA_CNT,
1555         mmCPG_EDC_TAG_CNT,
1556         mmDC_EDC_CSINVOC_CNT,
1557         mmDC_EDC_RESTORE_CNT,
1558         mmDC_EDC_STATE_CNT,
1559         mmGDS_EDC_CNT,
1560         mmGDS_EDC_GRBM_CNT,
1561         mmGDS_EDC_OA_DED,
1562         mmSPI_EDC_CNT,
1563         mmSQC_ATC_EDC_GATCL1_CNT,
1564         mmSQC_EDC_CNT,
1565         mmSQ_EDC_DED_CNT,
1566         mmSQ_EDC_INFO,
1567         mmSQ_EDC_SEC_CNT,
1568         mmTCC_EDC_CNT,
1569         mmTCP_ATC_EDC_GATCL1_CNT,
1570         mmTCP_EDC_CNT,
1571         mmTD_EDC_CNT
1572 };
1573
1574 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1575 {
1576         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1577         struct amdgpu_ib ib;
1578         struct fence *f = NULL;
1579         int r, i;
1580         u32 tmp;
1581         unsigned total_size, vgpr_offset, sgpr_offset;
1582         u64 gpu_addr;
1583
1584         /* only supported on CZ */
1585         if (adev->asic_type != CHIP_CARRIZO)
1586                 return 0;
1587
1588         /* bail if the compute ring is not ready */
1589         if (!ring->ready)
1590                 return 0;
1591
1592         tmp = RREG32(mmGB_EDC_MODE);
1593         WREG32(mmGB_EDC_MODE, 0);
1594
1595         total_size =
1596                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1597         total_size +=
1598                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1599         total_size +=
1600                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1601         total_size = ALIGN(total_size, 256);
1602         vgpr_offset = total_size;
1603         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1604         sgpr_offset = total_size;
1605         total_size += sizeof(sgpr_init_compute_shader);
1606
1607         /* allocate an indirect buffer to put the commands in */
1608         memset(&ib, 0, sizeof(ib));
1609         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1610         if (r) {
1611                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1612                 return r;
1613         }
1614
1615         /* load the compute shaders */
1616         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1617                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1618
1619         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1620                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1621
1622         /* init the ib length to 0 */
1623         ib.length_dw = 0;
1624
1625         /* VGPR */
1626         /* write the register state for the compute dispatch */
1627         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1628                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1630                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1631         }
1632         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1634         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1638
1639         /* write dispatch packet */
1640         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641         ib.ptr[ib.length_dw++] = 8; /* x */
1642         ib.ptr[ib.length_dw++] = 1; /* y */
1643         ib.ptr[ib.length_dw++] = 1; /* z */
1644         ib.ptr[ib.length_dw++] =
1645                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1646
1647         /* write CS partial flush packet */
1648         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1650
1651         /* SGPR1 */
1652         /* write the register state for the compute dispatch */
1653         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1654                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1656                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1657         }
1658         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1660         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664
1665         /* write dispatch packet */
1666         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667         ib.ptr[ib.length_dw++] = 8; /* x */
1668         ib.ptr[ib.length_dw++] = 1; /* y */
1669         ib.ptr[ib.length_dw++] = 1; /* z */
1670         ib.ptr[ib.length_dw++] =
1671                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672
1673         /* write CS partial flush packet */
1674         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676
1677         /* SGPR2 */
1678         /* write the register state for the compute dispatch */
1679         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1680                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1681                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1682                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1683         }
1684         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1685         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1686         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1687         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1688         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1689         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1690
1691         /* write dispatch packet */
1692         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1693         ib.ptr[ib.length_dw++] = 8; /* x */
1694         ib.ptr[ib.length_dw++] = 1; /* y */
1695         ib.ptr[ib.length_dw++] = 1; /* z */
1696         ib.ptr[ib.length_dw++] =
1697                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1698
1699         /* write CS partial flush packet */
1700         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1701         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1702
1703         /* shedule the ib on the ring */
1704         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1705         if (r) {
1706                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1707                 goto fail;
1708         }
1709
1710         /* wait for the GPU to finish processing the IB */
1711         r = fence_wait(f, false);
1712         if (r) {
1713                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1714                 goto fail;
1715         }
1716
1717         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1718         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1719         WREG32(mmGB_EDC_MODE, tmp);
1720
1721         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1722         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1723         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1724
1725
1726         /* read back registers to clear the counters */
1727         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1728                 RREG32(sec_ded_counter_registers[i]);
1729
1730 fail:
1731         amdgpu_ib_free(adev, &ib, NULL);
1732         fence_put(f);
1733
1734         return r;
1735 }
1736
1737 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1738 {
1739         u32 gb_addr_config;
1740         u32 mc_shared_chmap, mc_arb_ramcfg;
1741         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1742         u32 tmp;
1743         int ret;
1744
1745         switch (adev->asic_type) {
1746         case CHIP_TOPAZ:
1747                 adev->gfx.config.max_shader_engines = 1;
1748                 adev->gfx.config.max_tile_pipes = 2;
1749                 adev->gfx.config.max_cu_per_sh = 6;
1750                 adev->gfx.config.max_sh_per_se = 1;
1751                 adev->gfx.config.max_backends_per_se = 2;
1752                 adev->gfx.config.max_texture_channel_caches = 2;
1753                 adev->gfx.config.max_gprs = 256;
1754                 adev->gfx.config.max_gs_threads = 32;
1755                 adev->gfx.config.max_hw_contexts = 8;
1756
1757                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1758                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1759                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1760                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1761                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1762                 break;
1763         case CHIP_FIJI:
1764                 adev->gfx.config.max_shader_engines = 4;
1765                 adev->gfx.config.max_tile_pipes = 16;
1766                 adev->gfx.config.max_cu_per_sh = 16;
1767                 adev->gfx.config.max_sh_per_se = 1;
1768                 adev->gfx.config.max_backends_per_se = 4;
1769                 adev->gfx.config.max_texture_channel_caches = 16;
1770                 adev->gfx.config.max_gprs = 256;
1771                 adev->gfx.config.max_gs_threads = 32;
1772                 adev->gfx.config.max_hw_contexts = 8;
1773
1774                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1775                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1776                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1777                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1778                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1779                 break;
1780         case CHIP_POLARIS11:
1781                 ret = amdgpu_atombios_get_gfx_info(adev);
1782                 if (ret)
1783                         return ret;
1784                 adev->gfx.config.max_gprs = 256;
1785                 adev->gfx.config.max_gs_threads = 32;
1786                 adev->gfx.config.max_hw_contexts = 8;
1787
1788                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1789                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1790                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1791                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1792                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1793                 break;
1794         case CHIP_POLARIS10:
1795                 ret = amdgpu_atombios_get_gfx_info(adev);
1796                 if (ret)
1797                         return ret;
1798                 adev->gfx.config.max_gprs = 256;
1799                 adev->gfx.config.max_gs_threads = 32;
1800                 adev->gfx.config.max_hw_contexts = 8;
1801
1802                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1803                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1804                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1805                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1806                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1807                 break;
1808         case CHIP_TONGA:
1809                 adev->gfx.config.max_shader_engines = 4;
1810                 adev->gfx.config.max_tile_pipes = 8;
1811                 adev->gfx.config.max_cu_per_sh = 8;
1812                 adev->gfx.config.max_sh_per_se = 1;
1813                 adev->gfx.config.max_backends_per_se = 2;
1814                 adev->gfx.config.max_texture_channel_caches = 8;
1815                 adev->gfx.config.max_gprs = 256;
1816                 adev->gfx.config.max_gs_threads = 32;
1817                 adev->gfx.config.max_hw_contexts = 8;
1818
1819                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1820                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1821                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1822                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1823                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1824                 break;
1825         case CHIP_CARRIZO:
1826                 adev->gfx.config.max_shader_engines = 1;
1827                 adev->gfx.config.max_tile_pipes = 2;
1828                 adev->gfx.config.max_sh_per_se = 1;
1829                 adev->gfx.config.max_backends_per_se = 2;
1830
1831                 switch (adev->pdev->revision) {
1832                 case 0xc4:
1833                 case 0x84:
1834                 case 0xc8:
1835                 case 0xcc:
1836                 case 0xe1:
1837                 case 0xe3:
1838                         /* B10 */
1839                         adev->gfx.config.max_cu_per_sh = 8;
1840                         break;
1841                 case 0xc5:
1842                 case 0x81:
1843                 case 0x85:
1844                 case 0xc9:
1845                 case 0xcd:
1846                 case 0xe2:
1847                 case 0xe4:
1848                         /* B8 */
1849                         adev->gfx.config.max_cu_per_sh = 6;
1850                         break;
1851                 case 0xc6:
1852                 case 0xca:
1853                 case 0xce:
1854                 case 0x88:
1855                         /* B6 */
1856                         adev->gfx.config.max_cu_per_sh = 6;
1857                         break;
1858                 case 0xc7:
1859                 case 0x87:
1860                 case 0xcb:
1861                 case 0xe5:
1862                 case 0x89:
1863                 default:
1864                         /* B4 */
1865                         adev->gfx.config.max_cu_per_sh = 4;
1866                         break;
1867                 }
1868
1869                 adev->gfx.config.max_texture_channel_caches = 2;
1870                 adev->gfx.config.max_gprs = 256;
1871                 adev->gfx.config.max_gs_threads = 32;
1872                 adev->gfx.config.max_hw_contexts = 8;
1873
1874                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1875                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1876                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1877                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1878                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1879                 break;
1880         case CHIP_STONEY:
1881                 adev->gfx.config.max_shader_engines = 1;
1882                 adev->gfx.config.max_tile_pipes = 2;
1883                 adev->gfx.config.max_sh_per_se = 1;
1884                 adev->gfx.config.max_backends_per_se = 1;
1885
1886                 switch (adev->pdev->revision) {
1887                 case 0xc0:
1888                 case 0xc1:
1889                 case 0xc2:
1890                 case 0xc4:
1891                 case 0xc8:
1892                 case 0xc9:
1893                         adev->gfx.config.max_cu_per_sh = 3;
1894                         break;
1895                 case 0xd0:
1896                 case 0xd1:
1897                 case 0xd2:
1898                 default:
1899                         adev->gfx.config.max_cu_per_sh = 2;
1900                         break;
1901                 }
1902
1903                 adev->gfx.config.max_texture_channel_caches = 2;
1904                 adev->gfx.config.max_gprs = 256;
1905                 adev->gfx.config.max_gs_threads = 16;
1906                 adev->gfx.config.max_hw_contexts = 8;
1907
1908                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1909                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1910                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1911                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1912                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1913                 break;
1914         default:
1915                 adev->gfx.config.max_shader_engines = 2;
1916                 adev->gfx.config.max_tile_pipes = 4;
1917                 adev->gfx.config.max_cu_per_sh = 2;
1918                 adev->gfx.config.max_sh_per_se = 1;
1919                 adev->gfx.config.max_backends_per_se = 2;
1920                 adev->gfx.config.max_texture_channel_caches = 4;
1921                 adev->gfx.config.max_gprs = 256;
1922                 adev->gfx.config.max_gs_threads = 32;
1923                 adev->gfx.config.max_hw_contexts = 8;
1924
1925                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1926                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1927                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1928                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1929                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1930                 break;
1931         }
1932
1933         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1934         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1935         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1936
1937         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1938         adev->gfx.config.mem_max_burst_length_bytes = 256;
1939         if (adev->flags & AMD_IS_APU) {
1940                 /* Get memory bank mapping mode. */
1941                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1942                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1943                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1944
1945                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1946                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1947                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1948
1949                 /* Validate settings in case only one DIMM installed. */
1950                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1951                         dimm00_addr_map = 0;
1952                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1953                         dimm01_addr_map = 0;
1954                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1955                         dimm10_addr_map = 0;
1956                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1957                         dimm11_addr_map = 0;
1958
1959                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1960                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1961                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1962                         adev->gfx.config.mem_row_size_in_kb = 2;
1963                 else
1964                         adev->gfx.config.mem_row_size_in_kb = 1;
1965         } else {
1966                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1967                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1968                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1969                         adev->gfx.config.mem_row_size_in_kb = 4;
1970         }
1971
1972         adev->gfx.config.shader_engine_tile_size = 32;
1973         adev->gfx.config.num_gpus = 1;
1974         adev->gfx.config.multi_gpu_tile_size = 64;
1975
1976         /* fix up row size */
1977         switch (adev->gfx.config.mem_row_size_in_kb) {
1978         case 1:
1979         default:
1980                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1981                 break;
1982         case 2:
1983                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1984                 break;
1985         case 4:
1986                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1987                 break;
1988         }
1989         adev->gfx.config.gb_addr_config = gb_addr_config;
1990
1991         return 0;
1992 }
1993
1994 static int gfx_v8_0_sw_init(void *handle)
1995 {
1996         int i, r;
1997         struct amdgpu_ring *ring;
1998         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1999
2000         /* EOP Event */
2001         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2002         if (r)
2003                 return r;
2004
2005         /* Privileged reg */
2006         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2007         if (r)
2008                 return r;
2009
2010         /* Privileged inst */
2011         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2012         if (r)
2013                 return r;
2014
2015         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2016
2017         gfx_v8_0_scratch_init(adev);
2018
2019         r = gfx_v8_0_init_microcode(adev);
2020         if (r) {
2021                 DRM_ERROR("Failed to load gfx firmware!\n");
2022                 return r;
2023         }
2024
2025         r = gfx_v8_0_rlc_init(adev);
2026         if (r) {
2027                 DRM_ERROR("Failed to init rlc BOs!\n");
2028                 return r;
2029         }
2030
2031         r = gfx_v8_0_mec_init(adev);
2032         if (r) {
2033                 DRM_ERROR("Failed to init MEC BOs!\n");
2034                 return r;
2035         }
2036
2037         /* set up the gfx ring */
2038         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2039                 ring = &adev->gfx.gfx_ring[i];
2040                 ring->ring_obj = NULL;
2041                 sprintf(ring->name, "gfx");
2042                 /* no gfx doorbells on iceland */
2043                 if (adev->asic_type != CHIP_TOPAZ) {
2044                         ring->use_doorbell = true;
2045                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2046                 }
2047
2048                 r = amdgpu_ring_init(adev, ring, 1024,
2049                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2050                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2051                                      AMDGPU_RING_TYPE_GFX);
2052                 if (r)
2053                         return r;
2054         }
2055
2056         /* set up the compute queues */
2057         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2058                 unsigned irq_type;
2059
2060                 /* max 32 queues per MEC */
2061                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2062                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2063                         break;
2064                 }
2065                 ring = &adev->gfx.compute_ring[i];
2066                 ring->ring_obj = NULL;
2067                 ring->use_doorbell = true;
2068                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2069                 ring->me = 1; /* first MEC */
2070                 ring->pipe = i / 8;
2071                 ring->queue = i % 8;
2072                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2073                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2074                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2075                 r = amdgpu_ring_init(adev, ring, 1024,
2076                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2077                                      &adev->gfx.eop_irq, irq_type,
2078                                      AMDGPU_RING_TYPE_COMPUTE);
2079                 if (r)
2080                         return r;
2081         }
2082
2083         /* reserve GDS, GWS and OA resource for gfx */
2084         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2085                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2086                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2087         if (r)
2088                 return r;
2089
2090         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2091                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2092                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2093         if (r)
2094                 return r;
2095
2096         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2097                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2098                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2099         if (r)
2100                 return r;
2101
2102         adev->gfx.ce_ram_size = 0x8000;
2103
2104         r = gfx_v8_0_gpu_early_init(adev);
2105         if (r)
2106                 return r;
2107
2108         return 0;
2109 }
2110
2111 static int gfx_v8_0_sw_fini(void *handle)
2112 {
2113         int i;
2114         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2115
2116         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2117         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2118         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2119
2120         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2121                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2122         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2123                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2124
2125         gfx_v8_0_mec_fini(adev);
2126         gfx_v8_0_rlc_fini(adev);
2127         gfx_v8_0_free_microcode(adev);
2128
2129         return 0;
2130 }
2131
2132 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2133 {
2134         uint32_t *modearray, *mod2array;
2135         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2136         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2137         u32 reg_offset;
2138
2139         modearray = adev->gfx.config.tile_mode_array;
2140         mod2array = adev->gfx.config.macrotile_mode_array;
2141
2142         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2143                 modearray[reg_offset] = 0;
2144
2145         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2146                 mod2array[reg_offset] = 0;
2147
2148         switch (adev->asic_type) {
2149         case CHIP_TOPAZ:
2150                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151                                 PIPE_CONFIG(ADDR_SURF_P2) |
2152                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2153                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2154                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2155                                 PIPE_CONFIG(ADDR_SURF_P2) |
2156                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2157                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2158                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2159                                 PIPE_CONFIG(ADDR_SURF_P2) |
2160                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2161                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2162                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2163                                 PIPE_CONFIG(ADDR_SURF_P2) |
2164                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2165                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2166                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2167                                 PIPE_CONFIG(ADDR_SURF_P2) |
2168                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2169                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2170                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2171                                 PIPE_CONFIG(ADDR_SURF_P2) |
2172                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2173                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2174                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175                                 PIPE_CONFIG(ADDR_SURF_P2) |
2176                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2177                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2178                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2179                                 PIPE_CONFIG(ADDR_SURF_P2));
2180                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2181                                 PIPE_CONFIG(ADDR_SURF_P2) |
2182                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2183                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2185                                  PIPE_CONFIG(ADDR_SURF_P2) |
2186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2189                                  PIPE_CONFIG(ADDR_SURF_P2) |
2190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2192                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2193                                  PIPE_CONFIG(ADDR_SURF_P2) |
2194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2197                                  PIPE_CONFIG(ADDR_SURF_P2) |
2198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2201                                  PIPE_CONFIG(ADDR_SURF_P2) |
2202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2205                                  PIPE_CONFIG(ADDR_SURF_P2) |
2206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2208                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2209                                  PIPE_CONFIG(ADDR_SURF_P2) |
2210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2212                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2213                                  PIPE_CONFIG(ADDR_SURF_P2) |
2214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2216                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2217                                  PIPE_CONFIG(ADDR_SURF_P2) |
2218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2220                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2221                                  PIPE_CONFIG(ADDR_SURF_P2) |
2222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2224                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2225                                  PIPE_CONFIG(ADDR_SURF_P2) |
2226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2228                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2229                                  PIPE_CONFIG(ADDR_SURF_P2) |
2230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2232                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2233                                  PIPE_CONFIG(ADDR_SURF_P2) |
2234                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2235                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2236                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2237                                  PIPE_CONFIG(ADDR_SURF_P2) |
2238                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2239                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2240                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2241                                  PIPE_CONFIG(ADDR_SURF_P2) |
2242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2244                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245                                  PIPE_CONFIG(ADDR_SURF_P2) |
2246                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2247                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2248                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2249                                  PIPE_CONFIG(ADDR_SURF_P2) |
2250                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2251                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2252
2253                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2254                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256                                 NUM_BANKS(ADDR_SURF_8_BANK));
2257                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2258                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2260                                 NUM_BANKS(ADDR_SURF_8_BANK));
2261                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2262                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2263                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264                                 NUM_BANKS(ADDR_SURF_8_BANK));
2265                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2267                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268                                 NUM_BANKS(ADDR_SURF_8_BANK));
2269                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2271                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2272                                 NUM_BANKS(ADDR_SURF_8_BANK));
2273                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276                                 NUM_BANKS(ADDR_SURF_8_BANK));
2277                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2279                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2280                                 NUM_BANKS(ADDR_SURF_8_BANK));
2281                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2282                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2283                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2284                                 NUM_BANKS(ADDR_SURF_16_BANK));
2285                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2286                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288                                 NUM_BANKS(ADDR_SURF_16_BANK));
2289                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2290                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2291                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2292                                  NUM_BANKS(ADDR_SURF_16_BANK));
2293                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2294                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2295                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2296                                  NUM_BANKS(ADDR_SURF_16_BANK));
2297                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2299                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2300                                  NUM_BANKS(ADDR_SURF_16_BANK));
2301                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2303                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2304                                  NUM_BANKS(ADDR_SURF_16_BANK));
2305                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2306                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2307                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2308                                  NUM_BANKS(ADDR_SURF_8_BANK));
2309
2310                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2311                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2312                             reg_offset != 23)
2313                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2314
2315                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2316                         if (reg_offset != 7)
2317                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2318
2319                 break;
2320         case CHIP_FIJI:
2321                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2324                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2325                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2326                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2328                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2329                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2332                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2333                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2336                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2337                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2338                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2340                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2341                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2342                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2344                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2345                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2346                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2348                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2349                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2350                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2351                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2352                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2353                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2354                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2355                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2356                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2358                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2364                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2367                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2372                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2375                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2376                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2379                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2380                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2383                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2384                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2386                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2387                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2388                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2389                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2390                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2391                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2392                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2394                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2395                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2396                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2398                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2399                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2400                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2402                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2403                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2404                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2406                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2407                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2408                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2410                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2411                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2412                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2414                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2415                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2416                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2419                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2420                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2422                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2423                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2424                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2427                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2443
2444                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2447                                 NUM_BANKS(ADDR_SURF_8_BANK));
2448                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2451                                 NUM_BANKS(ADDR_SURF_8_BANK));
2452                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2454                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455                                 NUM_BANKS(ADDR_SURF_8_BANK));
2456                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2458                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2459                                 NUM_BANKS(ADDR_SURF_8_BANK));
2460                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2462                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463                                 NUM_BANKS(ADDR_SURF_8_BANK));
2464                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467                                 NUM_BANKS(ADDR_SURF_8_BANK));
2468                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471                                 NUM_BANKS(ADDR_SURF_8_BANK));
2472                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2474                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475                                 NUM_BANKS(ADDR_SURF_8_BANK));
2476                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479                                 NUM_BANKS(ADDR_SURF_8_BANK));
2480                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2482                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                                  NUM_BANKS(ADDR_SURF_8_BANK));
2484                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                                  NUM_BANKS(ADDR_SURF_8_BANK));
2488                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2490                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2491                                  NUM_BANKS(ADDR_SURF_8_BANK));
2492                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495                                  NUM_BANKS(ADDR_SURF_8_BANK));
2496                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499                                  NUM_BANKS(ADDR_SURF_4_BANK));
2500
2501                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2502                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2503
2504                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2505                         if (reg_offset != 7)
2506                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2507
2508                 break;
2509         case CHIP_TONGA:
2510                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2513                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2514                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2517                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2518                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2521                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2522                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2523                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2525                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2526                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2529                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2530                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2531                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2533                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2534                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2535                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2537                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2538                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2539                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2540                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2541                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2542                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2543                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2544                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2553                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2556                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2558                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2561                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2563                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2567                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2569                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2571                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2573                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2575                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2576                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2577                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2578                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2579                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2580                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2581                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2583                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2584                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2585                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2587                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2588                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2589                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2591                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2592                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2593                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2595                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2596                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2597                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2599                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2600                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2601                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2603                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2604                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2605                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2607                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2608                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2609                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2611                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2612                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2613                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2615                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2616                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2617                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2619                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2623                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2627                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2629                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2631                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2632
2633                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2635                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2636                                 NUM_BANKS(ADDR_SURF_16_BANK));
2637                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2639                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2640                                 NUM_BANKS(ADDR_SURF_16_BANK));
2641                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2643                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2644                                 NUM_BANKS(ADDR_SURF_16_BANK));
2645                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2647                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2648                                 NUM_BANKS(ADDR_SURF_16_BANK));
2649                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2651                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2652                                 NUM_BANKS(ADDR_SURF_16_BANK));
2653                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2655                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2656                                 NUM_BANKS(ADDR_SURF_16_BANK));
2657                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2660                                 NUM_BANKS(ADDR_SURF_16_BANK));
2661                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2663                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2664                                 NUM_BANKS(ADDR_SURF_16_BANK));
2665                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2667                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2668                                 NUM_BANKS(ADDR_SURF_16_BANK));
2669                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2671                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2672                                  NUM_BANKS(ADDR_SURF_16_BANK));
2673                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2675                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2676                                  NUM_BANKS(ADDR_SURF_16_BANK));
2677                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2678                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2679                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2680                                  NUM_BANKS(ADDR_SURF_8_BANK));
2681                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2683                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2684                                  NUM_BANKS(ADDR_SURF_4_BANK));
2685                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2688                                  NUM_BANKS(ADDR_SURF_4_BANK));
2689
2690                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2691                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2692
2693                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2694                         if (reg_offset != 7)
2695                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2696
2697                 break;
2698         case CHIP_POLARIS11:
2699                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2702                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2703                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2706                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2707                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2710                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2711                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2714                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2715                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2718                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2719                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2722                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2723                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2726                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2727                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2730                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2731                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2733                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2734                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2736                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2737                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2740                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2742                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2744                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2745                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2748                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2749                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2750                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2752                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2753                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2756                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2757                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2758                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2760                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2761                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2762                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2764                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2765                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2766                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2768                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2769                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2770                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2772                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2773                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2774                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2776                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2777                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2778                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2780                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2781                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2782                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2784                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2785                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2786                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2788                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2789                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2790                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2792                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2793                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2794                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2796                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2797                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2798                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2800                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2801                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2802                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2804                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2805                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2808                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2812                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2813                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2814                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2816                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2817                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2818                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2821
2822                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825                                 NUM_BANKS(ADDR_SURF_16_BANK));
2826
2827                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830                                 NUM_BANKS(ADDR_SURF_16_BANK));
2831
2832                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836
2837                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840                                 NUM_BANKS(ADDR_SURF_16_BANK));
2841
2842                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845                                 NUM_BANKS(ADDR_SURF_16_BANK));
2846
2847                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850                                 NUM_BANKS(ADDR_SURF_16_BANK));
2851
2852                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                 NUM_BANKS(ADDR_SURF_16_BANK));
2856
2857                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861
2862                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2863                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865                                 NUM_BANKS(ADDR_SURF_16_BANK));
2866
2867                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2869                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2870                                 NUM_BANKS(ADDR_SURF_16_BANK));
2871
2872                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875                                 NUM_BANKS(ADDR_SURF_16_BANK));
2876
2877                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2879                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2880                                 NUM_BANKS(ADDR_SURF_16_BANK));
2881
2882                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2885                                 NUM_BANKS(ADDR_SURF_8_BANK));
2886
2887                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2888                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2889                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2890                                 NUM_BANKS(ADDR_SURF_4_BANK));
2891
2892                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2893                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2894
2895                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2896                         if (reg_offset != 7)
2897                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2898
2899                 break;
2900         case CHIP_POLARIS10:
2901                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2904                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2905                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2908                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2909                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2912                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2913                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2916                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2917                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2920                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2921                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2924                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2928                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2929                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2931                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2932                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2933                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2935                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2936                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2940                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2944                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2946                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2947                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2949                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2951                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2952                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2953                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2954                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2956                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2960                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2962                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2964                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2967                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2968                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2969                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2970                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2971                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2972                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2975                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2976                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2978                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2979                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2980                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2982                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2983                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2984                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2986                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2987                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2988                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2990                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2991                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2992                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2993                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2994                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2995                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2996                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2997                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2998                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2999                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3000                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3002                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3004                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3008                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3010                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3014                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3016                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3018                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3019                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3020                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3021                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3022                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3023
3024                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027                                 NUM_BANKS(ADDR_SURF_16_BANK));
3028
3029                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3031                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032                                 NUM_BANKS(ADDR_SURF_16_BANK));
3033
3034                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037                                 NUM_BANKS(ADDR_SURF_16_BANK));
3038
3039                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3041                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3042                                 NUM_BANKS(ADDR_SURF_16_BANK));
3043
3044                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3046                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047                                 NUM_BANKS(ADDR_SURF_16_BANK));
3048
3049                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052                                 NUM_BANKS(ADDR_SURF_16_BANK));
3053
3054                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057                                 NUM_BANKS(ADDR_SURF_16_BANK));
3058
3059                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3061                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3062                                 NUM_BANKS(ADDR_SURF_16_BANK));
3063
3064                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3066                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067                                 NUM_BANKS(ADDR_SURF_16_BANK));
3068
3069                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3071                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3072                                 NUM_BANKS(ADDR_SURF_16_BANK));
3073
3074                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077                                 NUM_BANKS(ADDR_SURF_16_BANK));
3078
3079                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3082                                 NUM_BANKS(ADDR_SURF_8_BANK));
3083
3084                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3087                                 NUM_BANKS(ADDR_SURF_4_BANK));
3088
3089                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3091                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3092                                 NUM_BANKS(ADDR_SURF_4_BANK));
3093
3094                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3095                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3096
3097                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3098                         if (reg_offset != 7)
3099                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3100
3101                 break;
3102         case CHIP_STONEY:
3103                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104                                 PIPE_CONFIG(ADDR_SURF_P2) |
3105                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3106                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3107                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108                                 PIPE_CONFIG(ADDR_SURF_P2) |
3109                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3110                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3111                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3112                                 PIPE_CONFIG(ADDR_SURF_P2) |
3113                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3114                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3115                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116                                 PIPE_CONFIG(ADDR_SURF_P2) |
3117                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3118                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3119                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120                                 PIPE_CONFIG(ADDR_SURF_P2) |
3121                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3122                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3123                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3124                                 PIPE_CONFIG(ADDR_SURF_P2) |
3125                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3126                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3127                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128                                 PIPE_CONFIG(ADDR_SURF_P2) |
3129                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3130                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3131                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3132                                 PIPE_CONFIG(ADDR_SURF_P2));
3133                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3134                                 PIPE_CONFIG(ADDR_SURF_P2) |
3135                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3136                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3137                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3138                                  PIPE_CONFIG(ADDR_SURF_P2) |
3139                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3140                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3142                                  PIPE_CONFIG(ADDR_SURF_P2) |
3143                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3144                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3145                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3146                                  PIPE_CONFIG(ADDR_SURF_P2) |
3147                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3148                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3149                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3150                                  PIPE_CONFIG(ADDR_SURF_P2) |
3151                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3152                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3153                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3154                                  PIPE_CONFIG(ADDR_SURF_P2) |
3155                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3156                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3157                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3158                                  PIPE_CONFIG(ADDR_SURF_P2) |
3159                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3160                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3161                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3162                                  PIPE_CONFIG(ADDR_SURF_P2) |
3163                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3164                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3165                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3166                                  PIPE_CONFIG(ADDR_SURF_P2) |
3167                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3168                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3169                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3170                                  PIPE_CONFIG(ADDR_SURF_P2) |
3171                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3172                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3173                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3174                                  PIPE_CONFIG(ADDR_SURF_P2) |
3175                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3176                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3177                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3178                                  PIPE_CONFIG(ADDR_SURF_P2) |
3179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3181                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3182                                  PIPE_CONFIG(ADDR_SURF_P2) |
3183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3185                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3186                                  PIPE_CONFIG(ADDR_SURF_P2) |
3187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3189                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3190                                  PIPE_CONFIG(ADDR_SURF_P2) |
3191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3193                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3194                                  PIPE_CONFIG(ADDR_SURF_P2) |
3195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3197                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3198                                  PIPE_CONFIG(ADDR_SURF_P2) |
3199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3201                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202                                  PIPE_CONFIG(ADDR_SURF_P2) |
3203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3205
3206                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3207                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3208                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3209                                 NUM_BANKS(ADDR_SURF_8_BANK));
3210                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3211                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3212                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3213                                 NUM_BANKS(ADDR_SURF_8_BANK));
3214                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3215                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3216                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3217                                 NUM_BANKS(ADDR_SURF_8_BANK));
3218                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3219                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3220                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3221                                 NUM_BANKS(ADDR_SURF_8_BANK));
3222                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3223                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3224                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3225                                 NUM_BANKS(ADDR_SURF_8_BANK));
3226                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3227                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3228                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3229                                 NUM_BANKS(ADDR_SURF_8_BANK));
3230                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3231                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3232                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3233                                 NUM_BANKS(ADDR_SURF_8_BANK));
3234                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3235                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3236                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3237                                 NUM_BANKS(ADDR_SURF_16_BANK));
3238                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3239                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3240                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241                                 NUM_BANKS(ADDR_SURF_16_BANK));
3242                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3243                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3244                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3245                                  NUM_BANKS(ADDR_SURF_16_BANK));
3246                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3247                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3248                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3249                                  NUM_BANKS(ADDR_SURF_16_BANK));
3250                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3252                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253                                  NUM_BANKS(ADDR_SURF_16_BANK));
3254                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3255                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3256                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3257                                  NUM_BANKS(ADDR_SURF_16_BANK));
3258                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3260                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3261                                  NUM_BANKS(ADDR_SURF_8_BANK));
3262
3263                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3264                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3265                             reg_offset != 23)
3266                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3267
3268                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3269                         if (reg_offset != 7)
3270                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3271
3272                 break;
3273         default:
3274                 dev_warn(adev->dev,
3275                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3276                          adev->asic_type);
3277
3278         case CHIP_CARRIZO:
3279                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3280                                 PIPE_CONFIG(ADDR_SURF_P2) |
3281                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3283                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3284                                 PIPE_CONFIG(ADDR_SURF_P2) |
3285                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3286                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3287                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288                                 PIPE_CONFIG(ADDR_SURF_P2) |
3289                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3290                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3291                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292                                 PIPE_CONFIG(ADDR_SURF_P2) |
3293                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3294                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3295                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3296                                 PIPE_CONFIG(ADDR_SURF_P2) |
3297                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3298                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3299                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3300                                 PIPE_CONFIG(ADDR_SURF_P2) |
3301                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3302                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3303                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3304                                 PIPE_CONFIG(ADDR_SURF_P2) |
3305                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3307                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3308                                 PIPE_CONFIG(ADDR_SURF_P2));
3309                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3310                                 PIPE_CONFIG(ADDR_SURF_P2) |
3311                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3312                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3313                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3314                                  PIPE_CONFIG(ADDR_SURF_P2) |
3315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3317                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3318                                  PIPE_CONFIG(ADDR_SURF_P2) |
3319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3321                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3322                                  PIPE_CONFIG(ADDR_SURF_P2) |
3323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3325                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3326                                  PIPE_CONFIG(ADDR_SURF_P2) |
3327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3329                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3330                                  PIPE_CONFIG(ADDR_SURF_P2) |
3331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3333                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3334                                  PIPE_CONFIG(ADDR_SURF_P2) |
3335                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3336                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3337                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3338                                  PIPE_CONFIG(ADDR_SURF_P2) |
3339                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3340                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3341                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3342                                  PIPE_CONFIG(ADDR_SURF_P2) |
3343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3345                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3346                                  PIPE_CONFIG(ADDR_SURF_P2) |
3347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3349                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3350                                  PIPE_CONFIG(ADDR_SURF_P2) |
3351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3353                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3354                                  PIPE_CONFIG(ADDR_SURF_P2) |
3355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3357                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3358                                  PIPE_CONFIG(ADDR_SURF_P2) |
3359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3361                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3362                                  PIPE_CONFIG(ADDR_SURF_P2) |
3363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3365                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3366                                  PIPE_CONFIG(ADDR_SURF_P2) |
3367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3369                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3370                                  PIPE_CONFIG(ADDR_SURF_P2) |
3371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3373                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3374                                  PIPE_CONFIG(ADDR_SURF_P2) |
3375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3377                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3378                                  PIPE_CONFIG(ADDR_SURF_P2) |
3379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3381
3382                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3384                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3385                                 NUM_BANKS(ADDR_SURF_8_BANK));
3386                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3387                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3388                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3389                                 NUM_BANKS(ADDR_SURF_8_BANK));
3390                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3391                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3392                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3393                                 NUM_BANKS(ADDR_SURF_8_BANK));
3394                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3395                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3396                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3397                                 NUM_BANKS(ADDR_SURF_8_BANK));
3398                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3400                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3401                                 NUM_BANKS(ADDR_SURF_8_BANK));
3402                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3405                                 NUM_BANKS(ADDR_SURF_8_BANK));
3406                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3407                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3408                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3409                                 NUM_BANKS(ADDR_SURF_8_BANK));
3410                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3411                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3412                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413                                 NUM_BANKS(ADDR_SURF_16_BANK));
3414                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3415                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3416                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417                                 NUM_BANKS(ADDR_SURF_16_BANK));
3418                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3419                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3420                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3421                                  NUM_BANKS(ADDR_SURF_16_BANK));
3422                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3423                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3424                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3425                                  NUM_BANKS(ADDR_SURF_16_BANK));
3426                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3427                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3428                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3429                                  NUM_BANKS(ADDR_SURF_16_BANK));
3430                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3432                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3433                                  NUM_BANKS(ADDR_SURF_16_BANK));
3434                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3435                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3436                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3437                                  NUM_BANKS(ADDR_SURF_8_BANK));
3438
3439                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3440                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3441                             reg_offset != 23)
3442                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3443
3444                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3445                         if (reg_offset != 7)
3446                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3447
3448                 break;
3449         }
3450 }
3451
3452 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3453                                   u32 se_num, u32 sh_num, u32 instance)
3454 {
3455         u32 data;
3456
3457         if (instance == 0xffffffff)
3458                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3459         else
3460                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3461
3462         if (se_num == 0xffffffff)
3463                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3464         else
3465                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3466
3467         if (sh_num == 0xffffffff)
3468                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3469         else
3470                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3471
3472         WREG32(mmGRBM_GFX_INDEX, data);
3473 }
3474
3475 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3476 {
3477         return (u32)((1ULL << bit_width) - 1);
3478 }
3479
3480 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3481 {
3482         u32 data, mask;
3483
3484         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3485                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3486
3487         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3488
3489         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3490                                        adev->gfx.config.max_sh_per_se);
3491
3492         return (~data) & mask;
3493 }
3494
3495 static void
3496 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3497 {
3498         switch (adev->asic_type) {
3499         case CHIP_FIJI:
3500                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3501                           RB_XSEL2(1) | PKR_MAP(2) |
3502                           PKR_XSEL(1) | PKR_YSEL(1) |
3503                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3504                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3505                            SE_PAIR_YSEL(2);
3506                 break;
3507         case CHIP_TONGA:
3508         case CHIP_POLARIS10:
3509                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3510                           SE_XSEL(1) | SE_YSEL(1);
3511                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3512                            SE_PAIR_YSEL(2);
3513                 break;
3514         case CHIP_TOPAZ:
3515         case CHIP_CARRIZO:
3516                 *rconf |= RB_MAP_PKR0(2);
3517                 *rconf1 |= 0x0;
3518                 break;
3519         case CHIP_POLARIS11:
3520                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3521                           SE_XSEL(1) | SE_YSEL(1);
3522                 *rconf1 |= 0x0;
3523                 break;
3524         case CHIP_STONEY:
3525                 *rconf |= 0x0;
3526                 *rconf1 |= 0x0;
3527                 break;
3528         default:
3529                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3530                 break;
3531         }
3532 }
3533
3534 static void
3535 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3536                                         u32 raster_config, u32 raster_config_1,
3537                                         unsigned rb_mask, unsigned num_rb)
3538 {
3539         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3540         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3541         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3542         unsigned rb_per_se = num_rb / num_se;
3543         unsigned se_mask[4];
3544         unsigned se;
3545
3546         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3547         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3548         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3549         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3550
3551         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3552         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3553         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3554
3555         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3556                              (!se_mask[2] && !se_mask[3]))) {
3557                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3558
3559                 if (!se_mask[0] && !se_mask[1]) {
3560                         raster_config_1 |=
3561                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3562                 } else {
3563                         raster_config_1 |=
3564                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3565                 }
3566         }
3567
3568         for (se = 0; se < num_se; se++) {
3569                 unsigned raster_config_se = raster_config;
3570                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3571                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3572                 int idx = (se / 2) * 2;
3573
3574                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3575                         raster_config_se &= ~SE_MAP_MASK;
3576
3577                         if (!se_mask[idx]) {
3578                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3579                         } else {
3580                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3581                         }
3582                 }
3583
3584                 pkr0_mask &= rb_mask;
3585                 pkr1_mask &= rb_mask;
3586                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3587                         raster_config_se &= ~PKR_MAP_MASK;
3588
3589                         if (!pkr0_mask) {
3590                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3591                         } else {
3592                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3593                         }
3594                 }
3595
3596                 if (rb_per_se >= 2) {
3597                         unsigned rb0_mask = 1 << (se * rb_per_se);
3598                         unsigned rb1_mask = rb0_mask << 1;
3599
3600                         rb0_mask &= rb_mask;
3601                         rb1_mask &= rb_mask;
3602                         if (!rb0_mask || !rb1_mask) {
3603                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3604
3605                                 if (!rb0_mask) {
3606                                         raster_config_se |=
3607                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3608                                 } else {
3609                                         raster_config_se |=
3610                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3611                                 }
3612                         }
3613
3614                         if (rb_per_se > 2) {
3615                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3616                                 rb1_mask = rb0_mask << 1;
3617                                 rb0_mask &= rb_mask;
3618                                 rb1_mask &= rb_mask;
3619                                 if (!rb0_mask || !rb1_mask) {
3620                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3621
3622                                         if (!rb0_mask) {
3623                                                 raster_config_se |=
3624                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3625                                         } else {
3626                                                 raster_config_se |=
3627                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3628                                         }
3629                                 }
3630                         }
3631                 }
3632
3633                 /* GRBM_GFX_INDEX has a different offset on VI */
3634                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3635                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3636                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3637         }
3638
3639         /* GRBM_GFX_INDEX has a different offset on VI */
3640         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3641 }
3642
3643 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3644 {
3645         int i, j;
3646         u32 data;
3647         u32 raster_config = 0, raster_config_1 = 0;
3648         u32 active_rbs = 0;
3649         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3650                                         adev->gfx.config.max_sh_per_se;
3651         unsigned num_rb_pipes;
3652
3653         mutex_lock(&adev->grbm_idx_mutex);
3654         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3655                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3656                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3657                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3658                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3659                                                rb_bitmap_width_per_sh);
3660                 }
3661         }
3662         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3663
3664         adev->gfx.config.backend_enable_mask = active_rbs;
3665         adev->gfx.config.num_rbs = hweight32(active_rbs);
3666
3667         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3668                              adev->gfx.config.max_shader_engines, 16);
3669
3670         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3671
3672         if (!adev->gfx.config.backend_enable_mask ||
3673                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3674                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3675                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3676         } else {
3677                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3678                                                         adev->gfx.config.backend_enable_mask,
3679                                                         num_rb_pipes);
3680         }
3681
3682         mutex_unlock(&adev->grbm_idx_mutex);
3683 }
3684
3685 /**
3686  * gfx_v8_0_init_compute_vmid - gart enable
3687  *
3688  * @rdev: amdgpu_device pointer
3689  *
3690  * Initialize compute vmid sh_mem registers
3691  *
3692  */
3693 #define DEFAULT_SH_MEM_BASES    (0x6000)
3694 #define FIRST_COMPUTE_VMID      (8)
3695 #define LAST_COMPUTE_VMID       (16)
3696 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3697 {
3698         int i;
3699         uint32_t sh_mem_config;
3700         uint32_t sh_mem_bases;
3701
3702         /*
3703          * Configure apertures:
3704          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3705          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3706          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3707          */
3708         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3709
3710         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3711                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3712                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3713                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3714                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3715                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3716
3717         mutex_lock(&adev->srbm_mutex);
3718         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3719                 vi_srbm_select(adev, 0, 0, 0, i);
3720                 /* CP and shaders */
3721                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3722                 WREG32(mmSH_MEM_APE1_BASE, 1);
3723                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3724                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3725         }
3726         vi_srbm_select(adev, 0, 0, 0, 0);
3727         mutex_unlock(&adev->srbm_mutex);
3728 }
3729
3730 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3731 {
3732         u32 tmp;
3733         int i;
3734
3735         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3736         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3737         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3738         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3739
3740         gfx_v8_0_tiling_mode_table_init(adev);
3741         gfx_v8_0_setup_rb(adev);
3742         gfx_v8_0_get_cu_info(adev);
3743
3744         /* XXX SH_MEM regs */
3745         /* where to put LDS, scratch, GPUVM in FSA64 space */
3746         mutex_lock(&adev->srbm_mutex);
3747         for (i = 0; i < 16; i++) {
3748                 vi_srbm_select(adev, 0, 0, 0, i);
3749                 /* CP and shaders */
3750                 if (i == 0) {
3751                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3752                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3753                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3754                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3755                         WREG32(mmSH_MEM_CONFIG, tmp);
3756                 } else {
3757                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3758                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3759                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3760                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3761                         WREG32(mmSH_MEM_CONFIG, tmp);
3762                 }
3763
3764                 WREG32(mmSH_MEM_APE1_BASE, 1);
3765                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3766                 WREG32(mmSH_MEM_BASES, 0);
3767         }
3768         vi_srbm_select(adev, 0, 0, 0, 0);
3769         mutex_unlock(&adev->srbm_mutex);
3770
3771         gfx_v8_0_init_compute_vmid(adev);
3772
3773         mutex_lock(&adev->grbm_idx_mutex);
3774         /*
3775          * making sure that the following register writes will be broadcasted
3776          * to all the shaders
3777          */
3778         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3779
3780         WREG32(mmPA_SC_FIFO_SIZE,
3781                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3782                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3783                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3784                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3785                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3786                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3787                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3788                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3789         mutex_unlock(&adev->grbm_idx_mutex);
3790
3791 }
3792
3793 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3794 {
3795         u32 i, j, k;
3796         u32 mask;
3797
3798         mutex_lock(&adev->grbm_idx_mutex);
3799         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3800                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3801                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3802                         for (k = 0; k < adev->usec_timeout; k++) {
3803                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3804                                         break;
3805                                 udelay(1);
3806                         }
3807                 }
3808         }
3809         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3810         mutex_unlock(&adev->grbm_idx_mutex);
3811
3812         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3813                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3814                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3815                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3816         for (k = 0; k < adev->usec_timeout; k++) {
3817                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3818                         break;
3819                 udelay(1);
3820         }
3821 }
3822
3823 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3824                                                bool enable)
3825 {
3826         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3827
3828         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3829         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3830         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3831         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3832
3833         WREG32(mmCP_INT_CNTL_RING0, tmp);
3834 }
3835
3836 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3837 {
3838         /* csib */
3839         WREG32(mmRLC_CSIB_ADDR_HI,
3840                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3841         WREG32(mmRLC_CSIB_ADDR_LO,
3842                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3843         WREG32(mmRLC_CSIB_LENGTH,
3844                         adev->gfx.rlc.clear_state_size);
3845 }
3846
3847 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3848                                 int ind_offset,
3849                                 int list_size,
3850                                 int *unique_indices,
3851                                 int *indices_count,
3852                                 int max_indices,
3853                                 int *ind_start_offsets,
3854                                 int *offset_count,
3855                                 int max_offset)
3856 {
3857         int indices;
3858         bool new_entry = true;
3859
3860         for (; ind_offset < list_size; ind_offset++) {
3861
3862                 if (new_entry) {
3863                         new_entry = false;
3864                         ind_start_offsets[*offset_count] = ind_offset;
3865                         *offset_count = *offset_count + 1;
3866                         BUG_ON(*offset_count >= max_offset);
3867                 }
3868
3869                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3870                         new_entry = true;
3871                         continue;
3872                 }
3873
3874                 ind_offset += 2;
3875
3876                 /* look for the matching indice */
3877                 for (indices = 0;
3878                         indices < *indices_count;
3879                         indices++) {
3880                         if (unique_indices[indices] ==
3881                                 register_list_format[ind_offset])
3882                                 break;
3883                 }
3884
3885                 if (indices >= *indices_count) {
3886                         unique_indices[*indices_count] =
3887                                 register_list_format[ind_offset];
3888                         indices = *indices_count;
3889                         *indices_count = *indices_count + 1;
3890                         BUG_ON(*indices_count >= max_indices);
3891                 }
3892
3893                 register_list_format[ind_offset] = indices;
3894         }
3895 }
3896
3897 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3898 {
3899         int i, temp, data;
3900         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3901         int indices_count = 0;
3902         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3903         int offset_count = 0;
3904
3905         int list_size;
3906         unsigned int *register_list_format =
3907                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3908         if (register_list_format == NULL)
3909                 return -ENOMEM;
3910         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3911                         adev->gfx.rlc.reg_list_format_size_bytes);
3912
3913         gfx_v8_0_parse_ind_reg_list(register_list_format,
3914                                 RLC_FormatDirectRegListLength,
3915                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3916                                 unique_indices,
3917                                 &indices_count,
3918                                 sizeof(unique_indices) / sizeof(int),
3919                                 indirect_start_offsets,
3920                                 &offset_count,
3921                                 sizeof(indirect_start_offsets)/sizeof(int));
3922
3923         /* save and restore list */
3924         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3925
3926         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3927         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3928                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3929
3930         /* indirect list */
3931         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3932         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3933                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3934
3935         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3936         list_size = list_size >> 1;
3937         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3938         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3939
3940         /* starting offsets starts */
3941         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3942                 adev->gfx.rlc.starting_offsets_start);
3943         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3944                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3945                                 indirect_start_offsets[i]);
3946
3947         /* unique indices */
3948         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3949         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3950         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3951                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3952                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3953         }
3954         kfree(register_list_format);
3955
3956         return 0;
3957 }
3958
3959 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3960 {
3961         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3962 }
3963
3964 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3965 {
3966         uint32_t data;
3967
3968         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3969                               AMD_PG_SUPPORT_GFX_SMG |
3970                               AMD_PG_SUPPORT_GFX_DMG)) {
3971                 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3972
3973                 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3974                 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3975                 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3976                 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3977                 WREG32(mmRLC_PG_DELAY, data);
3978
3979                 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3980                 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3981         }
3982 }
3983
3984 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3985                                                 bool enable)
3986 {
3987         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3988 }
3989
3990 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3991                                                   bool enable)
3992 {
3993         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3994 }
3995
3996 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3997 {
3998         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
3999 }
4000
4001 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4002 {
4003         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4004                               AMD_PG_SUPPORT_GFX_SMG |
4005                               AMD_PG_SUPPORT_GFX_DMG |
4006                               AMD_PG_SUPPORT_CP |
4007                               AMD_PG_SUPPORT_GDS |
4008                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
4009                 gfx_v8_0_init_csb(adev);
4010                 gfx_v8_0_init_save_restore_list(adev);
4011                 gfx_v8_0_enable_save_restore_machine(adev);
4012
4013                 if ((adev->asic_type == CHIP_CARRIZO) ||
4014                     (adev->asic_type == CHIP_STONEY)) {
4015                         WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4016                         gfx_v8_0_init_power_gating(adev);
4017                         WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4018                         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4019                                 cz_enable_sck_slow_down_on_power_up(adev, true);
4020                                 cz_enable_sck_slow_down_on_power_down(adev, true);
4021                         } else {
4022                                 cz_enable_sck_slow_down_on_power_up(adev, false);
4023                                 cz_enable_sck_slow_down_on_power_down(adev, false);
4024                         }
4025                         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4026                                 cz_enable_cp_power_gating(adev, true);
4027                         else
4028                                 cz_enable_cp_power_gating(adev, false);
4029                 } else if (adev->asic_type == CHIP_POLARIS11) {
4030                         gfx_v8_0_init_power_gating(adev);
4031                 }
4032         }
4033 }
4034
4035 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4036 {
4037         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4038
4039         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4040         gfx_v8_0_wait_for_rlc_serdes(adev);
4041 }
4042
4043 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4044 {
4045         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4046         udelay(50);
4047
4048         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4049         udelay(50);
4050 }
4051
4052 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4053 {
4054         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4055
4056         /* carrizo do enable cp interrupt after cp inited */
4057         if (!(adev->flags & AMD_IS_APU))
4058                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4059
4060         udelay(50);
4061 }
4062
4063 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4064 {
4065         const struct rlc_firmware_header_v2_0 *hdr;
4066         const __le32 *fw_data;
4067         unsigned i, fw_size;
4068
4069         if (!adev->gfx.rlc_fw)
4070                 return -EINVAL;
4071
4072         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4073         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4074
4075         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4076                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4077         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4078
4079         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4080         for (i = 0; i < fw_size; i++)
4081                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4082         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4083
4084         return 0;
4085 }
4086
4087 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4088 {
4089         int r;
4090
4091         gfx_v8_0_rlc_stop(adev);
4092
4093         /* disable CG */
4094         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
4095         if (adev->asic_type == CHIP_POLARIS11 ||
4096             adev->asic_type == CHIP_POLARIS10)
4097                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
4098
4099         /* disable PG */
4100         WREG32(mmRLC_PG_CNTL, 0);
4101
4102         gfx_v8_0_rlc_reset(adev);
4103         gfx_v8_0_init_pg(adev);
4104
4105         if (!adev->pp_enabled) {
4106                 if (!adev->firmware.smu_load) {
4107                         /* legacy rlc firmware loading */
4108                         r = gfx_v8_0_rlc_load_microcode(adev);
4109                         if (r)
4110                                 return r;
4111                 } else {
4112                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4113                                                         AMDGPU_UCODE_ID_RLC_G);
4114                         if (r)
4115                                 return -EINVAL;
4116                 }
4117         }
4118
4119         gfx_v8_0_rlc_start(adev);
4120
4121         return 0;
4122 }
4123
4124 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4125 {
4126         int i;
4127         u32 tmp = RREG32(mmCP_ME_CNTL);
4128
4129         if (enable) {
4130                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4131                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4132                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4133         } else {
4134                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4135                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4136                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4137                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4138                         adev->gfx.gfx_ring[i].ready = false;
4139         }
4140         WREG32(mmCP_ME_CNTL, tmp);
4141         udelay(50);
4142 }
4143
4144 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4145 {
4146         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4147         const struct gfx_firmware_header_v1_0 *ce_hdr;
4148         const struct gfx_firmware_header_v1_0 *me_hdr;
4149         const __le32 *fw_data;
4150         unsigned i, fw_size;
4151
4152         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4153                 return -EINVAL;
4154
4155         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4156                 adev->gfx.pfp_fw->data;
4157         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4158                 adev->gfx.ce_fw->data;
4159         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4160                 adev->gfx.me_fw->data;
4161
4162         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4163         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4164         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4165
4166         gfx_v8_0_cp_gfx_enable(adev, false);
4167
4168         /* PFP */
4169         fw_data = (const __le32 *)
4170                 (adev->gfx.pfp_fw->data +
4171                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4172         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4173         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4174         for (i = 0; i < fw_size; i++)
4175                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4176         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4177
4178         /* CE */
4179         fw_data = (const __le32 *)
4180                 (adev->gfx.ce_fw->data +
4181                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4182         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4183         WREG32(mmCP_CE_UCODE_ADDR, 0);
4184         for (i = 0; i < fw_size; i++)
4185                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4186         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4187
4188         /* ME */
4189         fw_data = (const __le32 *)
4190                 (adev->gfx.me_fw->data +
4191                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4192         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4193         WREG32(mmCP_ME_RAM_WADDR, 0);
4194         for (i = 0; i < fw_size; i++)
4195                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4196         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4197
4198         return 0;
4199 }
4200
4201 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4202 {
4203         u32 count = 0;
4204         const struct cs_section_def *sect = NULL;
4205         const struct cs_extent_def *ext = NULL;
4206
4207         /* begin clear state */
4208         count += 2;
4209         /* context control state */
4210         count += 3;
4211
4212         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4213                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4214                         if (sect->id == SECT_CONTEXT)
4215                                 count += 2 + ext->reg_count;
4216                         else
4217                                 return 0;
4218                 }
4219         }
4220         /* pa_sc_raster_config/pa_sc_raster_config1 */
4221         count += 4;
4222         /* end clear state */
4223         count += 2;
4224         /* clear state */
4225         count += 2;
4226
4227         return count;
4228 }
4229
4230 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4231 {
4232         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4233         const struct cs_section_def *sect = NULL;
4234         const struct cs_extent_def *ext = NULL;
4235         int r, i;
4236
4237         /* init the CP */
4238         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4239         WREG32(mmCP_ENDIAN_SWAP, 0);
4240         WREG32(mmCP_DEVICE_ID, 1);
4241
4242         gfx_v8_0_cp_gfx_enable(adev, true);
4243
4244         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4245         if (r) {
4246                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4247                 return r;
4248         }
4249
4250         /* clear state buffer */
4251         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4252         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4253
4254         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4255         amdgpu_ring_write(ring, 0x80000000);
4256         amdgpu_ring_write(ring, 0x80000000);
4257
4258         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4259                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4260                         if (sect->id == SECT_CONTEXT) {
4261                                 amdgpu_ring_write(ring,
4262                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4263                                                ext->reg_count));
4264                                 amdgpu_ring_write(ring,
4265                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4266                                 for (i = 0; i < ext->reg_count; i++)
4267                                         amdgpu_ring_write(ring, ext->extent[i]);
4268                         }
4269                 }
4270         }
4271
4272         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4273         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4274         switch (adev->asic_type) {
4275         case CHIP_TONGA:
4276         case CHIP_POLARIS10:
4277                 amdgpu_ring_write(ring, 0x16000012);
4278                 amdgpu_ring_write(ring, 0x0000002A);
4279                 break;
4280         case CHIP_POLARIS11:
4281                 amdgpu_ring_write(ring, 0x16000012);
4282                 amdgpu_ring_write(ring, 0x00000000);
4283                 break;
4284         case CHIP_FIJI:
4285                 amdgpu_ring_write(ring, 0x3a00161a);
4286                 amdgpu_ring_write(ring, 0x0000002e);
4287                 break;
4288         case CHIP_CARRIZO:
4289                 amdgpu_ring_write(ring, 0x00000002);
4290                 amdgpu_ring_write(ring, 0x00000000);
4291                 break;
4292         case CHIP_TOPAZ:
4293                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4294                                 0x00000000 : 0x00000002);
4295                 amdgpu_ring_write(ring, 0x00000000);
4296                 break;
4297         case CHIP_STONEY:
4298                 amdgpu_ring_write(ring, 0x00000000);
4299                 amdgpu_ring_write(ring, 0x00000000);
4300                 break;
4301         default:
4302                 BUG();
4303         }
4304
4305         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4306         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4307
4308         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4309         amdgpu_ring_write(ring, 0);
4310
4311         /* init the CE partitions */
4312         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4313         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4314         amdgpu_ring_write(ring, 0x8000);
4315         amdgpu_ring_write(ring, 0x8000);
4316
4317         amdgpu_ring_commit(ring);
4318
4319         return 0;
4320 }
4321
4322 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4323 {
4324         struct amdgpu_ring *ring;
4325         u32 tmp;
4326         u32 rb_bufsz;
4327         u64 rb_addr, rptr_addr;
4328         int r;
4329
4330         /* Set the write pointer delay */
4331         WREG32(mmCP_RB_WPTR_DELAY, 0);
4332
4333         /* set the RB to use vmid 0 */
4334         WREG32(mmCP_RB_VMID, 0);
4335
4336         /* Set ring buffer size */
4337         ring = &adev->gfx.gfx_ring[0];
4338         rb_bufsz = order_base_2(ring->ring_size / 8);
4339         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4340         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4341         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4342         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4343 #ifdef __BIG_ENDIAN
4344         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4345 #endif
4346         WREG32(mmCP_RB0_CNTL, tmp);
4347
4348         /* Initialize the ring buffer's read and write pointers */
4349         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4350         ring->wptr = 0;
4351         WREG32(mmCP_RB0_WPTR, ring->wptr);
4352
4353         /* set the wb address wether it's enabled or not */
4354         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4355         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4356         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4357
4358         mdelay(1);
4359         WREG32(mmCP_RB0_CNTL, tmp);
4360
4361         rb_addr = ring->gpu_addr >> 8;
4362         WREG32(mmCP_RB0_BASE, rb_addr);
4363         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4364
4365         /* no gfx doorbells on iceland */
4366         if (adev->asic_type != CHIP_TOPAZ) {
4367                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4368                 if (ring->use_doorbell) {
4369                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4370                                             DOORBELL_OFFSET, ring->doorbell_index);
4371                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4372                                             DOORBELL_HIT, 0);
4373                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4374                                             DOORBELL_EN, 1);
4375                 } else {
4376                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4377                                             DOORBELL_EN, 0);
4378                 }
4379                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4380
4381                 if (adev->asic_type == CHIP_TONGA) {
4382                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4383                                             DOORBELL_RANGE_LOWER,
4384                                             AMDGPU_DOORBELL_GFX_RING0);
4385                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4386
4387                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4388                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4389                 }
4390
4391         }
4392
4393         /* start the ring */
4394         gfx_v8_0_cp_gfx_start(adev);
4395         ring->ready = true;
4396         r = amdgpu_ring_test_ring(ring);
4397         if (r)
4398                 ring->ready = false;
4399
4400         return r;
4401 }
4402
4403 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4404 {
4405         int i;
4406
4407         if (enable) {
4408                 WREG32(mmCP_MEC_CNTL, 0);
4409         } else {
4410                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4411                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4412                         adev->gfx.compute_ring[i].ready = false;
4413         }
4414         udelay(50);
4415 }
4416
4417 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4418 {
4419         const struct gfx_firmware_header_v1_0 *mec_hdr;
4420         const __le32 *fw_data;
4421         unsigned i, fw_size;
4422
4423         if (!adev->gfx.mec_fw)
4424                 return -EINVAL;
4425
4426         gfx_v8_0_cp_compute_enable(adev, false);
4427
4428         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4429         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4430
4431         fw_data = (const __le32 *)
4432                 (adev->gfx.mec_fw->data +
4433                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4434         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4435
4436         /* MEC1 */
4437         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4438         for (i = 0; i < fw_size; i++)
4439                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4440         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4441
4442         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4443         if (adev->gfx.mec2_fw) {
4444                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4445
4446                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4447                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4448
4449                 fw_data = (const __le32 *)
4450                         (adev->gfx.mec2_fw->data +
4451                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4452                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4453
4454                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4455                 for (i = 0; i < fw_size; i++)
4456                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4457                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4458         }
4459
4460         return 0;
4461 }
4462
4463 struct vi_mqd {
4464         uint32_t header;  /* ordinal0 */
4465         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4466         uint32_t compute_dim_x;  /* ordinal2 */
4467         uint32_t compute_dim_y;  /* ordinal3 */
4468         uint32_t compute_dim_z;  /* ordinal4 */
4469         uint32_t compute_start_x;  /* ordinal5 */
4470         uint32_t compute_start_y;  /* ordinal6 */
4471         uint32_t compute_start_z;  /* ordinal7 */
4472         uint32_t compute_num_thread_x;  /* ordinal8 */
4473         uint32_t compute_num_thread_y;  /* ordinal9 */
4474         uint32_t compute_num_thread_z;  /* ordinal10 */
4475         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4476         uint32_t compute_perfcount_enable;  /* ordinal12 */
4477         uint32_t compute_pgm_lo;  /* ordinal13 */
4478         uint32_t compute_pgm_hi;  /* ordinal14 */
4479         uint32_t compute_tba_lo;  /* ordinal15 */
4480         uint32_t compute_tba_hi;  /* ordinal16 */
4481         uint32_t compute_tma_lo;  /* ordinal17 */
4482         uint32_t compute_tma_hi;  /* ordinal18 */
4483         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4484         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4485         uint32_t compute_vmid;  /* ordinal21 */
4486         uint32_t compute_resource_limits;  /* ordinal22 */
4487         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4488         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4489         uint32_t compute_tmpring_size;  /* ordinal25 */
4490         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4491         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4492         uint32_t compute_restart_x;  /* ordinal28 */
4493         uint32_t compute_restart_y;  /* ordinal29 */
4494         uint32_t compute_restart_z;  /* ordinal30 */
4495         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4496         uint32_t compute_misc_reserved;  /* ordinal32 */
4497         uint32_t compute_dispatch_id;  /* ordinal33 */
4498         uint32_t compute_threadgroup_id;  /* ordinal34 */
4499         uint32_t compute_relaunch;  /* ordinal35 */
4500         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4501         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4502         uint32_t compute_wave_restore_control;  /* ordinal38 */
4503         uint32_t reserved9;  /* ordinal39 */
4504         uint32_t reserved10;  /* ordinal40 */
4505         uint32_t reserved11;  /* ordinal41 */
4506         uint32_t reserved12;  /* ordinal42 */
4507         uint32_t reserved13;  /* ordinal43 */
4508         uint32_t reserved14;  /* ordinal44 */
4509         uint32_t reserved15;  /* ordinal45 */
4510         uint32_t reserved16;  /* ordinal46 */
4511         uint32_t reserved17;  /* ordinal47 */
4512         uint32_t reserved18;  /* ordinal48 */
4513         uint32_t reserved19;  /* ordinal49 */
4514         uint32_t reserved20;  /* ordinal50 */
4515         uint32_t reserved21;  /* ordinal51 */
4516         uint32_t reserved22;  /* ordinal52 */
4517         uint32_t reserved23;  /* ordinal53 */
4518         uint32_t reserved24;  /* ordinal54 */
4519         uint32_t reserved25;  /* ordinal55 */
4520         uint32_t reserved26;  /* ordinal56 */
4521         uint32_t reserved27;  /* ordinal57 */
4522         uint32_t reserved28;  /* ordinal58 */
4523         uint32_t reserved29;  /* ordinal59 */
4524         uint32_t reserved30;  /* ordinal60 */
4525         uint32_t reserved31;  /* ordinal61 */
4526         uint32_t reserved32;  /* ordinal62 */
4527         uint32_t reserved33;  /* ordinal63 */
4528         uint32_t reserved34;  /* ordinal64 */
4529         uint32_t compute_user_data_0;  /* ordinal65 */
4530         uint32_t compute_user_data_1;  /* ordinal66 */
4531         uint32_t compute_user_data_2;  /* ordinal67 */
4532         uint32_t compute_user_data_3;  /* ordinal68 */
4533         uint32_t compute_user_data_4;  /* ordinal69 */
4534         uint32_t compute_user_data_5;  /* ordinal70 */
4535         uint32_t compute_user_data_6;  /* ordinal71 */
4536         uint32_t compute_user_data_7;  /* ordinal72 */
4537         uint32_t compute_user_data_8;  /* ordinal73 */
4538         uint32_t compute_user_data_9;  /* ordinal74 */
4539         uint32_t compute_user_data_10;  /* ordinal75 */
4540         uint32_t compute_user_data_11;  /* ordinal76 */
4541         uint32_t compute_user_data_12;  /* ordinal77 */
4542         uint32_t compute_user_data_13;  /* ordinal78 */
4543         uint32_t compute_user_data_14;  /* ordinal79 */
4544         uint32_t compute_user_data_15;  /* ordinal80 */
4545         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4546         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4547         uint32_t reserved35;  /* ordinal83 */
4548         uint32_t reserved36;  /* ordinal84 */
4549         uint32_t reserved37;  /* ordinal85 */
4550         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4551         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4552         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4553         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4554         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4555         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4556         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4557         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4558         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4559         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4560         uint32_t reserved38;  /* ordinal96 */
4561         uint32_t reserved39;  /* ordinal97 */
4562         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4563         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4564         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4565         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4566         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4567         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4568         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4569         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4570         uint32_t reserved40;  /* ordinal106 */
4571         uint32_t reserved41;  /* ordinal107 */
4572         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4573         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4574         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4575         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4576         uint32_t reserved42;  /* ordinal112 */
4577         uint32_t reserved43;  /* ordinal113 */
4578         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4579         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4580         uint32_t cp_packet_id_lo;  /* ordinal116 */
4581         uint32_t cp_packet_id_hi;  /* ordinal117 */
4582         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4583         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4584         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4585         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4586         uint32_t gds_save_mask_lo;  /* ordinal122 */
4587         uint32_t gds_save_mask_hi;  /* ordinal123 */
4588         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4589         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4590         uint32_t reserved44;  /* ordinal126 */
4591         uint32_t reserved45;  /* ordinal127 */
4592         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4593         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4594         uint32_t cp_hqd_active;  /* ordinal130 */
4595         uint32_t cp_hqd_vmid;  /* ordinal131 */
4596         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4597         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4598         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4599         uint32_t cp_hqd_quantum;  /* ordinal135 */
4600         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4601         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4602         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4603         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4604         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4605         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4606         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4607         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4608         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4609         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4610         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4611         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4612         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4613         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4614         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4615         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4616         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4617         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4618         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4619         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4620         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4621         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4622         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4623         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4624         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4625         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4626         uint32_t cp_mqd_control;  /* ordinal162 */
4627         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4628         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4629         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4630         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4631         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4632         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4633         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4634         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4635         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4636         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4637         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4638         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4639         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4640         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4641         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4642         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4643         uint32_t cp_hqd_error;  /* ordinal179 */
4644         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4645         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4646         uint32_t reserved46;  /* ordinal182 */
4647         uint32_t reserved47;  /* ordinal183 */
4648         uint32_t reserved48;  /* ordinal184 */
4649         uint32_t reserved49;  /* ordinal185 */
4650         uint32_t reserved50;  /* ordinal186 */
4651         uint32_t reserved51;  /* ordinal187 */
4652         uint32_t reserved52;  /* ordinal188 */
4653         uint32_t reserved53;  /* ordinal189 */
4654         uint32_t reserved54;  /* ordinal190 */
4655         uint32_t reserved55;  /* ordinal191 */
4656         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4657         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4658         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4659         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4660         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4661         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4662         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4663         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4664         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4665         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4666         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4667         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4668         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4669         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4670         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4671         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4672         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4673         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4674         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4675         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4676         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4677         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4678         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4679         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4680         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4681         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4682         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4683         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4684         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4685         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4686         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4687         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4688         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4689         uint32_t reserved56;  /* ordinal225 */
4690         uint32_t reserved57;  /* ordinal226 */
4691         uint32_t reserved58;  /* ordinal227 */
4692         uint32_t set_resources_header;  /* ordinal228 */
4693         uint32_t set_resources_dw1;  /* ordinal229 */
4694         uint32_t set_resources_dw2;  /* ordinal230 */
4695         uint32_t set_resources_dw3;  /* ordinal231 */
4696         uint32_t set_resources_dw4;  /* ordinal232 */
4697         uint32_t set_resources_dw5;  /* ordinal233 */
4698         uint32_t set_resources_dw6;  /* ordinal234 */
4699         uint32_t set_resources_dw7;  /* ordinal235 */
4700         uint32_t reserved59;  /* ordinal236 */
4701         uint32_t reserved60;  /* ordinal237 */
4702         uint32_t reserved61;  /* ordinal238 */
4703         uint32_t reserved62;  /* ordinal239 */
4704         uint32_t reserved63;  /* ordinal240 */
4705         uint32_t reserved64;  /* ordinal241 */
4706         uint32_t reserved65;  /* ordinal242 */
4707         uint32_t reserved66;  /* ordinal243 */
4708         uint32_t reserved67;  /* ordinal244 */
4709         uint32_t reserved68;  /* ordinal245 */
4710         uint32_t reserved69;  /* ordinal246 */
4711         uint32_t reserved70;  /* ordinal247 */
4712         uint32_t reserved71;  /* ordinal248 */
4713         uint32_t reserved72;  /* ordinal249 */
4714         uint32_t reserved73;  /* ordinal250 */
4715         uint32_t reserved74;  /* ordinal251 */
4716         uint32_t reserved75;  /* ordinal252 */
4717         uint32_t reserved76;  /* ordinal253 */
4718         uint32_t reserved77;  /* ordinal254 */
4719         uint32_t reserved78;  /* ordinal255 */
4720
4721         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4722 };
4723
4724 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4725 {
4726         int i, r;
4727
4728         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4729                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4730
4731                 if (ring->mqd_obj) {
4732                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4733                         if (unlikely(r != 0))
4734                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4735
4736                         amdgpu_bo_unpin(ring->mqd_obj);
4737                         amdgpu_bo_unreserve(ring->mqd_obj);
4738
4739                         amdgpu_bo_unref(&ring->mqd_obj);
4740                         ring->mqd_obj = NULL;
4741                 }
4742         }
4743 }
4744
4745 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4746 {
4747         int r, i, j;
4748         u32 tmp;
4749         bool use_doorbell = true;
4750         u64 hqd_gpu_addr;
4751         u64 mqd_gpu_addr;
4752         u64 eop_gpu_addr;
4753         u64 wb_gpu_addr;
4754         u32 *buf;
4755         struct vi_mqd *mqd;
4756
4757         /* init the pipes */
4758         mutex_lock(&adev->srbm_mutex);
4759         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4760                 int me = (i < 4) ? 1 : 2;
4761                 int pipe = (i < 4) ? i : (i - 4);
4762
4763                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4764                 eop_gpu_addr >>= 8;
4765
4766                 vi_srbm_select(adev, me, pipe, 0, 0);
4767
4768                 /* write the EOP addr */
4769                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4770                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4771
4772                 /* set the VMID assigned */
4773                 WREG32(mmCP_HQD_VMID, 0);
4774
4775                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4776                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4777                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4778                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4779                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4780         }
4781         vi_srbm_select(adev, 0, 0, 0, 0);
4782         mutex_unlock(&adev->srbm_mutex);
4783
4784         /* init the queues.  Just two for now. */
4785         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4786                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4787
4788                 if (ring->mqd_obj == NULL) {
4789                         r = amdgpu_bo_create(adev,
4790                                              sizeof(struct vi_mqd),
4791                                              PAGE_SIZE, true,
4792                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4793                                              NULL, &ring->mqd_obj);
4794                         if (r) {
4795                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4796                                 return r;
4797                         }
4798                 }
4799
4800                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4801                 if (unlikely(r != 0)) {
4802                         gfx_v8_0_cp_compute_fini(adev);
4803                         return r;
4804                 }
4805                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4806                                   &mqd_gpu_addr);
4807                 if (r) {
4808                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4809                         gfx_v8_0_cp_compute_fini(adev);
4810                         return r;
4811                 }
4812                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4813                 if (r) {
4814                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4815                         gfx_v8_0_cp_compute_fini(adev);
4816                         return r;
4817                 }
4818
4819                 /* init the mqd struct */
4820                 memset(buf, 0, sizeof(struct vi_mqd));
4821
4822                 mqd = (struct vi_mqd *)buf;
4823                 mqd->header = 0xC0310800;
4824                 mqd->compute_pipelinestat_enable = 0x00000001;
4825                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4826                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4827                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4828                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4829                 mqd->compute_misc_reserved = 0x00000003;
4830
4831                 mutex_lock(&adev->srbm_mutex);
4832                 vi_srbm_select(adev, ring->me,
4833                                ring->pipe,
4834                                ring->queue, 0);
4835
4836                 /* disable wptr polling */
4837                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4838                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4839                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4840
4841                 mqd->cp_hqd_eop_base_addr_lo =
4842                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4843                 mqd->cp_hqd_eop_base_addr_hi =
4844                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4845
4846                 /* enable doorbell? */
4847                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4848                 if (use_doorbell) {
4849                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4850                 } else {
4851                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4852                 }
4853                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4854                 mqd->cp_hqd_pq_doorbell_control = tmp;
4855
4856                 /* disable the queue if it's active */
4857                 mqd->cp_hqd_dequeue_request = 0;
4858                 mqd->cp_hqd_pq_rptr = 0;
4859                 mqd->cp_hqd_pq_wptr= 0;
4860                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4861                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4862                         for (j = 0; j < adev->usec_timeout; j++) {
4863                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4864                                         break;
4865                                 udelay(1);
4866                         }
4867                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4868                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4869                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4870                 }
4871
4872                 /* set the pointer to the MQD */
4873                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4874                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4875                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4876                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4877
4878                 /* set MQD vmid to 0 */
4879                 tmp = RREG32(mmCP_MQD_CONTROL);
4880                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4881                 WREG32(mmCP_MQD_CONTROL, tmp);
4882                 mqd->cp_mqd_control = tmp;
4883
4884                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4885                 hqd_gpu_addr = ring->gpu_addr >> 8;
4886                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4887                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4888                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4889                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4890
4891                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4892                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4893                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4894                                     (order_base_2(ring->ring_size / 4) - 1));
4895                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4896                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4897 #ifdef __BIG_ENDIAN
4898                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4899 #endif
4900                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4901                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4902                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4903                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4904                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4905                 mqd->cp_hqd_pq_control = tmp;
4906
4907                 /* set the wb address wether it's enabled or not */
4908                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4909                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4910                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4911                         upper_32_bits(wb_gpu_addr) & 0xffff;
4912                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4913                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4914                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4915                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4916
4917                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4918                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4919                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4920                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4921                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4922                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4923                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4924
4925                 /* enable the doorbell if requested */
4926                 if (use_doorbell) {
4927                         if ((adev->asic_type == CHIP_CARRIZO) ||
4928                             (adev->asic_type == CHIP_FIJI) ||
4929                             (adev->asic_type == CHIP_STONEY) ||
4930                             (adev->asic_type == CHIP_POLARIS11) ||
4931                             (adev->asic_type == CHIP_POLARIS10)) {
4932                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4933                                        AMDGPU_DOORBELL_KIQ << 2);
4934                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4935                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4936                         }
4937                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4938                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4939                                             DOORBELL_OFFSET, ring->doorbell_index);
4940                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4941                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4942                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4943                         mqd->cp_hqd_pq_doorbell_control = tmp;
4944
4945                 } else {
4946                         mqd->cp_hqd_pq_doorbell_control = 0;
4947                 }
4948                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4949                        mqd->cp_hqd_pq_doorbell_control);
4950
4951                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4952                 ring->wptr = 0;
4953                 mqd->cp_hqd_pq_wptr = ring->wptr;
4954                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4955                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4956
4957                 /* set the vmid for the queue */
4958                 mqd->cp_hqd_vmid = 0;
4959                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4960
4961                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4962                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4963                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4964                 mqd->cp_hqd_persistent_state = tmp;
4965                 if (adev->asic_type == CHIP_STONEY ||
4966                         adev->asic_type == CHIP_POLARIS11 ||
4967                         adev->asic_type == CHIP_POLARIS10) {
4968                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4969                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4970                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4971                 }
4972
4973                 /* activate the queue */
4974                 mqd->cp_hqd_active = 1;
4975                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4976
4977                 vi_srbm_select(adev, 0, 0, 0, 0);
4978                 mutex_unlock(&adev->srbm_mutex);
4979
4980                 amdgpu_bo_kunmap(ring->mqd_obj);
4981                 amdgpu_bo_unreserve(ring->mqd_obj);
4982         }
4983
4984         if (use_doorbell) {
4985                 tmp = RREG32(mmCP_PQ_STATUS);
4986                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4987                 WREG32(mmCP_PQ_STATUS, tmp);
4988         }
4989
4990         gfx_v8_0_cp_compute_enable(adev, true);
4991
4992         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4993                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4994
4995                 ring->ready = true;
4996                 r = amdgpu_ring_test_ring(ring);
4997                 if (r)
4998                         ring->ready = false;
4999         }
5000
5001         return 0;
5002 }
5003
5004 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5005 {
5006         int r;
5007
5008         if (!(adev->flags & AMD_IS_APU))
5009                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5010
5011         if (!adev->pp_enabled) {
5012                 if (!adev->firmware.smu_load) {
5013                         /* legacy firmware loading */
5014                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5015                         if (r)
5016                                 return r;
5017
5018                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5019                         if (r)
5020                                 return r;
5021                 } else {
5022                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5023                                                         AMDGPU_UCODE_ID_CP_CE);
5024                         if (r)
5025                                 return -EINVAL;
5026
5027                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5028                                                         AMDGPU_UCODE_ID_CP_PFP);
5029                         if (r)
5030                                 return -EINVAL;
5031
5032                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5033                                                         AMDGPU_UCODE_ID_CP_ME);
5034                         if (r)
5035                                 return -EINVAL;
5036
5037                         if (adev->asic_type == CHIP_TOPAZ) {
5038                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5039                                 if (r)
5040                                         return r;
5041                         } else {
5042                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5043                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5044                                 if (r)
5045                                         return -EINVAL;
5046                         }
5047                 }
5048         }
5049
5050         r = gfx_v8_0_cp_gfx_resume(adev);
5051         if (r)
5052                 return r;
5053
5054         r = gfx_v8_0_cp_compute_resume(adev);
5055         if (r)
5056                 return r;
5057
5058         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5059
5060         return 0;
5061 }
5062
5063 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5064 {
5065         gfx_v8_0_cp_gfx_enable(adev, enable);
5066         gfx_v8_0_cp_compute_enable(adev, enable);
5067 }
5068
5069 static int gfx_v8_0_hw_init(void *handle)
5070 {
5071         int r;
5072         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5073
5074         gfx_v8_0_init_golden_registers(adev);
5075         gfx_v8_0_gpu_init(adev);
5076
5077         r = gfx_v8_0_rlc_resume(adev);
5078         if (r)
5079                 return r;
5080
5081         r = gfx_v8_0_cp_resume(adev);
5082
5083         return r;
5084 }
5085
5086 static int gfx_v8_0_hw_fini(void *handle)
5087 {
5088         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5089
5090         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5091         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5092         gfx_v8_0_cp_enable(adev, false);
5093         gfx_v8_0_rlc_stop(adev);
5094         gfx_v8_0_cp_compute_fini(adev);
5095
5096         amdgpu_set_powergating_state(adev,
5097                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5098
5099         return 0;
5100 }
5101
5102 static int gfx_v8_0_suspend(void *handle)
5103 {
5104         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5105
5106         return gfx_v8_0_hw_fini(adev);
5107 }
5108
5109 static int gfx_v8_0_resume(void *handle)
5110 {
5111         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5112
5113         return gfx_v8_0_hw_init(adev);
5114 }
5115
5116 static bool gfx_v8_0_is_idle(void *handle)
5117 {
5118         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5119
5120         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5121                 return false;
5122         else
5123                 return true;
5124 }
5125
5126 static int gfx_v8_0_wait_for_idle(void *handle)
5127 {
5128         unsigned i;
5129         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5130
5131         for (i = 0; i < adev->usec_timeout; i++) {
5132                 if (gfx_v8_0_is_idle(handle))
5133                         return 0;
5134
5135                 udelay(1);
5136         }
5137         return -ETIMEDOUT;
5138 }
5139
5140 static int gfx_v8_0_check_soft_reset(void *handle)
5141 {
5142         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5143         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5144         u32 tmp;
5145
5146         /* GRBM_STATUS */
5147         tmp = RREG32(mmGRBM_STATUS);
5148         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5149                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5150                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5151                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5152                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5153                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5154                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5155                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5156                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5157                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5158                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5159                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5160                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5161         }
5162
5163         /* GRBM_STATUS2 */
5164         tmp = RREG32(mmGRBM_STATUS2);
5165         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5166                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5167                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5168
5169         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5170             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5171             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5172                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5173                                                 SOFT_RESET_CPF, 1);
5174                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5175                                                 SOFT_RESET_CPC, 1);
5176                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5177                                                 SOFT_RESET_CPG, 1);
5178                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5179                                                 SOFT_RESET_GRBM, 1);
5180         }
5181
5182         /* SRBM_STATUS */
5183         tmp = RREG32(mmSRBM_STATUS);
5184         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5185                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5186                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5187         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5188                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5189                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5190
5191         if (grbm_soft_reset || srbm_soft_reset) {
5192                 adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang = true;
5193                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5194                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5195         } else {
5196                 adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang = false;
5197                 adev->gfx.grbm_soft_reset = 0;
5198                 adev->gfx.srbm_soft_reset = 0;
5199         }
5200
5201         return 0;
5202 }
5203
5204 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5205                                   struct amdgpu_ring *ring)
5206 {
5207         int i;
5208
5209         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5210         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5211                 u32 tmp;
5212                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5213                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5214                                     DEQUEUE_REQ, 2);
5215                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5216                 for (i = 0; i < adev->usec_timeout; i++) {
5217                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5218                                 break;
5219                         udelay(1);
5220                 }
5221         }
5222 }
5223
5224 static int gfx_v8_0_pre_soft_reset(void *handle)
5225 {
5226         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5227         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5228
5229         if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang)
5230                 return 0;
5231
5232         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5233         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5234
5235         /* stop the rlc */
5236         gfx_v8_0_rlc_stop(adev);
5237
5238         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5239             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5240                 /* Disable GFX parsing/prefetching */
5241                 gfx_v8_0_cp_gfx_enable(adev, false);
5242
5243         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5244             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5245             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5246             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5247                 int i;
5248
5249                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5250                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5251
5252                         gfx_v8_0_inactive_hqd(adev, ring);
5253                 }
5254                 /* Disable MEC parsing/prefetching */
5255                 gfx_v8_0_cp_compute_enable(adev, false);
5256         }
5257
5258        return 0;
5259 }
5260
5261 static int gfx_v8_0_soft_reset(void *handle)
5262 {
5263         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5264         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5265         u32 tmp;
5266
5267         if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang)
5268                 return 0;
5269
5270         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5271         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5272
5273         if (grbm_soft_reset || srbm_soft_reset) {
5274                 tmp = RREG32(mmGMCON_DEBUG);
5275                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5276                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5277                 WREG32(mmGMCON_DEBUG, tmp);
5278                 udelay(50);
5279         }
5280
5281         if (grbm_soft_reset) {
5282                 tmp = RREG32(mmGRBM_SOFT_RESET);
5283                 tmp |= grbm_soft_reset;
5284                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5285                 WREG32(mmGRBM_SOFT_RESET, tmp);
5286                 tmp = RREG32(mmGRBM_SOFT_RESET);
5287
5288                 udelay(50);
5289
5290                 tmp &= ~grbm_soft_reset;
5291                 WREG32(mmGRBM_SOFT_RESET, tmp);
5292                 tmp = RREG32(mmGRBM_SOFT_RESET);
5293         }
5294
5295         if (srbm_soft_reset) {
5296                 tmp = RREG32(mmSRBM_SOFT_RESET);
5297                 tmp |= srbm_soft_reset;
5298                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5299                 WREG32(mmSRBM_SOFT_RESET, tmp);
5300                 tmp = RREG32(mmSRBM_SOFT_RESET);
5301
5302                 udelay(50);
5303
5304                 tmp &= ~srbm_soft_reset;
5305                 WREG32(mmSRBM_SOFT_RESET, tmp);
5306                 tmp = RREG32(mmSRBM_SOFT_RESET);
5307         }
5308
5309         if (grbm_soft_reset || srbm_soft_reset) {
5310                 tmp = RREG32(mmGMCON_DEBUG);
5311                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5312                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5313                 WREG32(mmGMCON_DEBUG, tmp);
5314         }
5315
5316         /* Wait a little for things to settle down */
5317         udelay(50);
5318
5319         return 0;
5320 }
5321
5322 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5323                               struct amdgpu_ring *ring)
5324 {
5325         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5326         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5327         WREG32(mmCP_HQD_PQ_RPTR, 0);
5328         WREG32(mmCP_HQD_PQ_WPTR, 0);
5329         vi_srbm_select(adev, 0, 0, 0, 0);
5330 }
5331
5332 static int gfx_v8_0_post_soft_reset(void *handle)
5333 {
5334         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5335         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5336
5337         if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang)
5338                 return 0;
5339
5340         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5341         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5342
5343         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5344             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5345                 gfx_v8_0_cp_gfx_resume(adev);
5346
5347         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5348             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5349             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5350             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5351                 int i;
5352
5353                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5354                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5355
5356                         gfx_v8_0_init_hqd(adev, ring);
5357                 }
5358                 gfx_v8_0_cp_compute_resume(adev);
5359         }
5360         gfx_v8_0_rlc_start(adev);
5361
5362         return 0;
5363 }
5364
5365 /**
5366  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5367  *
5368  * @adev: amdgpu_device pointer
5369  *
5370  * Fetches a GPU clock counter snapshot.
5371  * Returns the 64 bit clock counter snapshot.
5372  */
5373 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5374 {
5375         uint64_t clock;
5376
5377         mutex_lock(&adev->gfx.gpu_clock_mutex);
5378         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5379         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5380                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5381         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5382         return clock;
5383 }
5384
5385 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5386                                           uint32_t vmid,
5387                                           uint32_t gds_base, uint32_t gds_size,
5388                                           uint32_t gws_base, uint32_t gws_size,
5389                                           uint32_t oa_base, uint32_t oa_size)
5390 {
5391         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5392         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5393
5394         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5395         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5396
5397         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5398         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5399
5400         /* GDS Base */
5401         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5402         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5403                                 WRITE_DATA_DST_SEL(0)));
5404         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5405         amdgpu_ring_write(ring, 0);
5406         amdgpu_ring_write(ring, gds_base);
5407
5408         /* GDS Size */
5409         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5410         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5411                                 WRITE_DATA_DST_SEL(0)));
5412         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5413         amdgpu_ring_write(ring, 0);
5414         amdgpu_ring_write(ring, gds_size);
5415
5416         /* GWS */
5417         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5418         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5419                                 WRITE_DATA_DST_SEL(0)));
5420         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5421         amdgpu_ring_write(ring, 0);
5422         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5423
5424         /* OA */
5425         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5426         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5427                                 WRITE_DATA_DST_SEL(0)));
5428         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5429         amdgpu_ring_write(ring, 0);
5430         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5431 }
5432
5433 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5434         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5435         .select_se_sh = &gfx_v8_0_select_se_sh,
5436 };
5437
5438 static int gfx_v8_0_early_init(void *handle)
5439 {
5440         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5441
5442         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5443         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5444         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5445         gfx_v8_0_set_ring_funcs(adev);
5446         gfx_v8_0_set_irq_funcs(adev);
5447         gfx_v8_0_set_gds_init(adev);
5448         gfx_v8_0_set_rlc_funcs(adev);
5449
5450         return 0;
5451 }
5452
5453 static int gfx_v8_0_late_init(void *handle)
5454 {
5455         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5456         int r;
5457
5458         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5459         if (r)
5460                 return r;
5461
5462         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5463         if (r)
5464                 return r;
5465
5466         /* requires IBs so do in late init after IB pool is initialized */
5467         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5468         if (r)
5469                 return r;
5470
5471         amdgpu_set_powergating_state(adev,
5472                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5473
5474         return 0;
5475 }
5476
5477 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5478                                                        bool enable)
5479 {
5480         if (adev->asic_type == CHIP_POLARIS11)
5481                 /* Send msg to SMU via Powerplay */
5482                 amdgpu_set_powergating_state(adev,
5483                                              AMD_IP_BLOCK_TYPE_SMC,
5484                                              enable ?
5485                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5486
5487         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5488 }
5489
5490 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5491                                                         bool enable)
5492 {
5493         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5494 }
5495
5496 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5497                 bool enable)
5498 {
5499         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5500 }
5501
5502 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5503                                           bool enable)
5504 {
5505         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5506 }
5507
5508 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5509                                                 bool enable)
5510 {
5511         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5512
5513         /* Read any GFX register to wake up GFX. */
5514         if (!enable)
5515                 RREG32(mmDB_RENDER_CONTROL);
5516 }
5517
5518 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5519                                           bool enable)
5520 {
5521         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5522                 cz_enable_gfx_cg_power_gating(adev, true);
5523                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5524                         cz_enable_gfx_pipeline_power_gating(adev, true);
5525         } else {
5526                 cz_enable_gfx_cg_power_gating(adev, false);
5527                 cz_enable_gfx_pipeline_power_gating(adev, false);
5528         }
5529 }
5530
5531 static int gfx_v8_0_set_powergating_state(void *handle,
5532                                           enum amd_powergating_state state)
5533 {
5534         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5535         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5536
5537         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5538                 return 0;
5539
5540         switch (adev->asic_type) {
5541         case CHIP_CARRIZO:
5542         case CHIP_STONEY:
5543                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5544                         cz_update_gfx_cg_power_gating(adev, enable);
5545
5546                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5547                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5548                 else
5549                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5550
5551                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5552                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5553                 else
5554                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5555                 break;
5556         case CHIP_POLARIS11:
5557                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5558                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5559                 else
5560                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5561
5562                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5563                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5564                 else
5565                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5566
5567                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5568                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5569                 else
5570                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5571                 break;
5572         default:
5573                 break;
5574         }
5575
5576         return 0;
5577 }
5578
5579 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5580                                      uint32_t reg_addr, uint32_t cmd)
5581 {
5582         uint32_t data;
5583
5584         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5585
5586         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5587         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5588
5589         data = RREG32(mmRLC_SERDES_WR_CTRL);
5590         if (adev->asic_type == CHIP_STONEY)
5591                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5592                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5593                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5594                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5595                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5596                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5597                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5598                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5599                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5600         else
5601                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5602                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5603                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5604                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5605                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5606                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5607                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5608                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5609                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5610                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5611                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5612         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5613                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5614                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5615                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5616
5617         WREG32(mmRLC_SERDES_WR_CTRL, data);
5618 }
5619
5620 #define MSG_ENTER_RLC_SAFE_MODE     1
5621 #define MSG_EXIT_RLC_SAFE_MODE      0
5622 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5623 #define RLC_GPR_REG2__REQ__SHIFT 0
5624 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5625 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5626
5627 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5628 {
5629         u32 data = 0;
5630         unsigned i;
5631
5632         data = RREG32(mmRLC_CNTL);
5633         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5634                 return;
5635
5636         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5637             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5638                                AMD_PG_SUPPORT_GFX_DMG))) {
5639                 data |= RLC_GPR_REG2__REQ_MASK;
5640                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5641                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5642                 WREG32(mmRLC_GPR_REG2, data);
5643
5644                 for (i = 0; i < adev->usec_timeout; i++) {
5645                         if ((RREG32(mmRLC_GPM_STAT) &
5646                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5647                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5648                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5649                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5650                                 break;
5651                         udelay(1);
5652                 }
5653
5654                 for (i = 0; i < adev->usec_timeout; i++) {
5655                         if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5656                                 break;
5657                         udelay(1);
5658                 }
5659                 adev->gfx.rlc.in_safe_mode = true;
5660         }
5661 }
5662
5663 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5664 {
5665         u32 data;
5666         unsigned i;
5667
5668         data = RREG32(mmRLC_CNTL);
5669         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5670                 return;
5671
5672         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5673             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5674                                AMD_PG_SUPPORT_GFX_DMG))) {
5675                 data |= RLC_GPR_REG2__REQ_MASK;
5676                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5677                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5678                 WREG32(mmRLC_GPR_REG2, data);
5679                 adev->gfx.rlc.in_safe_mode = false;
5680         }
5681
5682         for (i = 0; i < adev->usec_timeout; i++) {
5683                 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5684                         break;
5685                 udelay(1);
5686         }
5687 }
5688
5689 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5690 {
5691         u32 data;
5692         unsigned i;
5693
5694         data = RREG32(mmRLC_CNTL);
5695         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5696                 return;
5697
5698         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5699                 data |= RLC_SAFE_MODE__CMD_MASK;
5700                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5701                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5702                 WREG32(mmRLC_SAFE_MODE, data);
5703
5704                 for (i = 0; i < adev->usec_timeout; i++) {
5705                         if ((RREG32(mmRLC_GPM_STAT) &
5706                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5707                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5708                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5709                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5710                                 break;
5711                         udelay(1);
5712                 }
5713
5714                 for (i = 0; i < adev->usec_timeout; i++) {
5715                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5716                                 break;
5717                         udelay(1);
5718                 }
5719                 adev->gfx.rlc.in_safe_mode = true;
5720         }
5721 }
5722
5723 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5724 {
5725         u32 data = 0;
5726         unsigned i;
5727
5728         data = RREG32(mmRLC_CNTL);
5729         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5730                 return;
5731
5732         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5733                 if (adev->gfx.rlc.in_safe_mode) {
5734                         data |= RLC_SAFE_MODE__CMD_MASK;
5735                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5736                         WREG32(mmRLC_SAFE_MODE, data);
5737                         adev->gfx.rlc.in_safe_mode = false;
5738                 }
5739         }
5740
5741         for (i = 0; i < adev->usec_timeout; i++) {
5742                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5743                         break;
5744                 udelay(1);
5745         }
5746 }
5747
5748 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5749 {
5750         adev->gfx.rlc.in_safe_mode = true;
5751 }
5752
5753 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5754 {
5755         adev->gfx.rlc.in_safe_mode = false;
5756 }
5757
5758 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5759         .enter_safe_mode = cz_enter_rlc_safe_mode,
5760         .exit_safe_mode = cz_exit_rlc_safe_mode
5761 };
5762
5763 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5764         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5765         .exit_safe_mode = iceland_exit_rlc_safe_mode
5766 };
5767
5768 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5769         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5770         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5771 };
5772
5773 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5774                                                       bool enable)
5775 {
5776         uint32_t temp, data;
5777
5778         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5779
5780         /* It is disabled by HW by default */
5781         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5782                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5783                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5784                                 /* 1 - RLC memory Light sleep */
5785                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5786
5787                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5788                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5789                 }
5790
5791                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5792                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5793                 if (adev->flags & AMD_IS_APU)
5794                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5795                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5796                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5797                 else
5798                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5799                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5800                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5801                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5802
5803                 if (temp != data)
5804                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5805
5806                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5807                 gfx_v8_0_wait_for_rlc_serdes(adev);
5808
5809                 /* 5 - clear mgcg override */
5810                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5811
5812                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5813                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5814                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5815                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5816                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5817                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5818                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5819                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5820                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5821                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5822                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5823                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5824                         if (temp != data)
5825                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5826                 }
5827                 udelay(50);
5828
5829                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5830                 gfx_v8_0_wait_for_rlc_serdes(adev);
5831         } else {
5832                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5833                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5834                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5835                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5836                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5837                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5838                 if (temp != data)
5839                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5840
5841                 /* 2 - disable MGLS in RLC */
5842                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5843                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5844                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5845                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5846                 }
5847
5848                 /* 3 - disable MGLS in CP */
5849                 data = RREG32(mmCP_MEM_SLP_CNTL);
5850                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5851                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5852                         WREG32(mmCP_MEM_SLP_CNTL, data);
5853                 }
5854
5855                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5856                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5857                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5858                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5859                 if (temp != data)
5860                         WREG32(mmCGTS_SM_CTRL_REG, data);
5861
5862                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5863                 gfx_v8_0_wait_for_rlc_serdes(adev);
5864
5865                 /* 6 - set mgcg override */
5866                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5867
5868                 udelay(50);
5869
5870                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5871                 gfx_v8_0_wait_for_rlc_serdes(adev);
5872         }
5873
5874         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5875 }
5876
5877 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5878                                                       bool enable)
5879 {
5880         uint32_t temp, temp1, data, data1;
5881
5882         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5883
5884         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5885
5886         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5887                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5888                  * Cmp_busy/GFX_Idle interrupts
5889                  */
5890                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5891
5892                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5893                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5894                 if (temp1 != data1)
5895                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5896
5897                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5898                 gfx_v8_0_wait_for_rlc_serdes(adev);
5899
5900                 /* 3 - clear cgcg override */
5901                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5902
5903                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5904                 gfx_v8_0_wait_for_rlc_serdes(adev);
5905
5906                 /* 4 - write cmd to set CGLS */
5907                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5908
5909                 /* 5 - enable cgcg */
5910                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5911
5912                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5913                         /* enable cgls*/
5914                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5915
5916                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5917                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5918
5919                         if (temp1 != data1)
5920                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5921                 } else {
5922                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5923                 }
5924
5925                 if (temp != data)
5926                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5927         } else {
5928                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5929                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5930
5931                 /* TEST CGCG */
5932                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5933                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5934                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5935                 if (temp1 != data1)
5936                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5937
5938                 /* read gfx register to wake up cgcg */
5939                 RREG32(mmCB_CGTT_SCLK_CTRL);
5940                 RREG32(mmCB_CGTT_SCLK_CTRL);
5941                 RREG32(mmCB_CGTT_SCLK_CTRL);
5942                 RREG32(mmCB_CGTT_SCLK_CTRL);
5943
5944                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5945                 gfx_v8_0_wait_for_rlc_serdes(adev);
5946
5947                 /* write cmd to Set CGCG Overrride */
5948                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5949
5950                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5951                 gfx_v8_0_wait_for_rlc_serdes(adev);
5952
5953                 /* write cmd to Clear CGLS */
5954                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5955
5956                 /* disable cgcg, cgls should be disabled too. */
5957                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5958                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5959                 if (temp != data)
5960                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5961         }
5962
5963         gfx_v8_0_wait_for_rlc_serdes(adev);
5964
5965         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5966 }
5967 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5968                                             bool enable)
5969 {
5970         if (enable) {
5971                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5972                  * ===  MGCG + MGLS + TS(CG/LS) ===
5973                  */
5974                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5975                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5976         } else {
5977                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5978                  * ===  CGCG + CGLS ===
5979                  */
5980                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5981                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5982         }
5983         return 0;
5984 }
5985
5986 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5987                                           enum amd_clockgating_state state)
5988 {
5989         uint32_t msg_id, pp_state;
5990         void *pp_handle = adev->powerplay.pp_handle;
5991
5992         if (state == AMD_CG_STATE_UNGATE)
5993                 pp_state = 0;
5994         else
5995                 pp_state = PP_STATE_CG | PP_STATE_LS;
5996
5997         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5998                         PP_BLOCK_GFX_CG,
5999                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6000                         pp_state);
6001         amd_set_clockgating_by_smu(pp_handle, msg_id);
6002
6003         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6004                         PP_BLOCK_GFX_MG,
6005                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6006                         pp_state);
6007         amd_set_clockgating_by_smu(pp_handle, msg_id);
6008
6009         return 0;
6010 }
6011
6012 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6013                                           enum amd_clockgating_state state)
6014 {
6015         uint32_t msg_id, pp_state;
6016         void *pp_handle = adev->powerplay.pp_handle;
6017
6018         if (state == AMD_CG_STATE_UNGATE)
6019                 pp_state = 0;
6020         else
6021                 pp_state = PP_STATE_CG | PP_STATE_LS;
6022
6023         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6024                         PP_BLOCK_GFX_CG,
6025                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6026                         pp_state);
6027         amd_set_clockgating_by_smu(pp_handle, msg_id);
6028
6029         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6030                         PP_BLOCK_GFX_3D,
6031                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6032                         pp_state);
6033         amd_set_clockgating_by_smu(pp_handle, msg_id);
6034
6035         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6036                         PP_BLOCK_GFX_MG,
6037                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6038                         pp_state);
6039         amd_set_clockgating_by_smu(pp_handle, msg_id);
6040
6041         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6042                         PP_BLOCK_GFX_RLC,
6043                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6044                         pp_state);
6045         amd_set_clockgating_by_smu(pp_handle, msg_id);
6046
6047         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6048                         PP_BLOCK_GFX_CP,
6049                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6050                         pp_state);
6051         amd_set_clockgating_by_smu(pp_handle, msg_id);
6052
6053         return 0;
6054 }
6055
6056 static int gfx_v8_0_set_clockgating_state(void *handle,
6057                                           enum amd_clockgating_state state)
6058 {
6059         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6060
6061         switch (adev->asic_type) {
6062         case CHIP_FIJI:
6063         case CHIP_CARRIZO:
6064         case CHIP_STONEY:
6065                 gfx_v8_0_update_gfx_clock_gating(adev,
6066                                                  state == AMD_CG_STATE_GATE ? true : false);
6067                 break;
6068         case CHIP_TONGA:
6069                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6070                 break;
6071         case CHIP_POLARIS10:
6072         case CHIP_POLARIS11:
6073                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6074                 break;
6075         default:
6076                 break;
6077         }
6078         return 0;
6079 }
6080
6081 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6082 {
6083         return ring->adev->wb.wb[ring->rptr_offs];
6084 }
6085
6086 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6087 {
6088         struct amdgpu_device *adev = ring->adev;
6089
6090         if (ring->use_doorbell)
6091                 /* XXX check if swapping is necessary on BE */
6092                 return ring->adev->wb.wb[ring->wptr_offs];
6093         else
6094                 return RREG32(mmCP_RB0_WPTR);
6095 }
6096
6097 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6098 {
6099         struct amdgpu_device *adev = ring->adev;
6100
6101         if (ring->use_doorbell) {
6102                 /* XXX check if swapping is necessary on BE */
6103                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6104                 WDOORBELL32(ring->doorbell_index, ring->wptr);
6105         } else {
6106                 WREG32(mmCP_RB0_WPTR, ring->wptr);
6107                 (void)RREG32(mmCP_RB0_WPTR);
6108         }
6109 }
6110
6111 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6112 {
6113         u32 ref_and_mask, reg_mem_engine;
6114
6115         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
6116                 switch (ring->me) {
6117                 case 1:
6118                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6119                         break;
6120                 case 2:
6121                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6122                         break;
6123                 default:
6124                         return;
6125                 }
6126                 reg_mem_engine = 0;
6127         } else {
6128                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6129                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6130         }
6131
6132         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6133         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6134                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6135                                  reg_mem_engine));
6136         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6137         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6138         amdgpu_ring_write(ring, ref_and_mask);
6139         amdgpu_ring_write(ring, ref_and_mask);
6140         amdgpu_ring_write(ring, 0x20); /* poll interval */
6141 }
6142
6143 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6144 {
6145         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6146         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6147                                  WRITE_DATA_DST_SEL(0) |
6148                                  WR_CONFIRM));
6149         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6150         amdgpu_ring_write(ring, 0);
6151         amdgpu_ring_write(ring, 1);
6152
6153 }
6154
6155 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6156                                       struct amdgpu_ib *ib,
6157                                       unsigned vm_id, bool ctx_switch)
6158 {
6159         u32 header, control = 0;
6160
6161         if (ib->flags & AMDGPU_IB_FLAG_CE)
6162                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6163         else
6164                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6165
6166         control |= ib->length_dw | (vm_id << 24);
6167
6168         amdgpu_ring_write(ring, header);
6169         amdgpu_ring_write(ring,
6170 #ifdef __BIG_ENDIAN
6171                           (2 << 0) |
6172 #endif
6173                           (ib->gpu_addr & 0xFFFFFFFC));
6174         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6175         amdgpu_ring_write(ring, control);
6176 }
6177
6178 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6179                                           struct amdgpu_ib *ib,
6180                                           unsigned vm_id, bool ctx_switch)
6181 {
6182         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6183
6184         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6185         amdgpu_ring_write(ring,
6186 #ifdef __BIG_ENDIAN
6187                                 (2 << 0) |
6188 #endif
6189                                 (ib->gpu_addr & 0xFFFFFFFC));
6190         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6191         amdgpu_ring_write(ring, control);
6192 }
6193
6194 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6195                                          u64 seq, unsigned flags)
6196 {
6197         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6198         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6199
6200         /* EVENT_WRITE_EOP - flush caches, send int */
6201         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6202         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6203                                  EOP_TC_ACTION_EN |
6204                                  EOP_TC_WB_ACTION_EN |
6205                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6206                                  EVENT_INDEX(5)));
6207         amdgpu_ring_write(ring, addr & 0xfffffffc);
6208         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6209                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6210         amdgpu_ring_write(ring, lower_32_bits(seq));
6211         amdgpu_ring_write(ring, upper_32_bits(seq));
6212
6213 }
6214
6215 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6216 {
6217         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6218         uint32_t seq = ring->fence_drv.sync_seq;
6219         uint64_t addr = ring->fence_drv.gpu_addr;
6220
6221         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6222         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6223                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6224                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6225         amdgpu_ring_write(ring, addr & 0xfffffffc);
6226         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6227         amdgpu_ring_write(ring, seq);
6228         amdgpu_ring_write(ring, 0xffffffff);
6229         amdgpu_ring_write(ring, 4); /* poll interval */
6230 }
6231
6232 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6233                                         unsigned vm_id, uint64_t pd_addr)
6234 {
6235         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6236
6237         /* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
6238         if (usepfp)
6239                 amdgpu_ring_insert_nop(ring, 128);
6240
6241         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6242         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6243                                  WRITE_DATA_DST_SEL(0)) |
6244                                  WR_CONFIRM);
6245         if (vm_id < 8) {
6246                 amdgpu_ring_write(ring,
6247                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6248         } else {
6249                 amdgpu_ring_write(ring,
6250                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6251         }
6252         amdgpu_ring_write(ring, 0);
6253         amdgpu_ring_write(ring, pd_addr >> 12);
6254
6255         /* bits 0-15 are the VM contexts0-15 */
6256         /* invalidate the cache */
6257         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6258         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6259                                  WRITE_DATA_DST_SEL(0)));
6260         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6261         amdgpu_ring_write(ring, 0);
6262         amdgpu_ring_write(ring, 1 << vm_id);
6263
6264         /* wait for the invalidate to complete */
6265         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6266         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6267                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6268                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6269         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6270         amdgpu_ring_write(ring, 0);
6271         amdgpu_ring_write(ring, 0); /* ref */
6272         amdgpu_ring_write(ring, 0); /* mask */
6273         amdgpu_ring_write(ring, 0x20); /* poll interval */
6274
6275         /* compute doesn't have PFP */
6276         if (usepfp) {
6277                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6278                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6279                 amdgpu_ring_write(ring, 0x0);
6280                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6281                 amdgpu_ring_insert_nop(ring, 128);
6282         }
6283 }
6284
6285 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6286 {
6287         return ring->adev->wb.wb[ring->wptr_offs];
6288 }
6289
6290 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6291 {
6292         struct amdgpu_device *adev = ring->adev;
6293
6294         /* XXX check if swapping is necessary on BE */
6295         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6296         WDOORBELL32(ring->doorbell_index, ring->wptr);
6297 }
6298
6299 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6300                                              u64 addr, u64 seq,
6301                                              unsigned flags)
6302 {
6303         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6304         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6305
6306         /* RELEASE_MEM - flush caches, send int */
6307         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6308         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6309                                  EOP_TC_ACTION_EN |
6310                                  EOP_TC_WB_ACTION_EN |
6311                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6312                                  EVENT_INDEX(5)));
6313         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6314         amdgpu_ring_write(ring, addr & 0xfffffffc);
6315         amdgpu_ring_write(ring, upper_32_bits(addr));
6316         amdgpu_ring_write(ring, lower_32_bits(seq));
6317         amdgpu_ring_write(ring, upper_32_bits(seq));
6318 }
6319
6320 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6321 {
6322         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6323         amdgpu_ring_write(ring, 0);
6324 }
6325
6326 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6327 {
6328         uint32_t dw2 = 0;
6329
6330         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6331         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6332                 /* set load_global_config & load_global_uconfig */
6333                 dw2 |= 0x8001;
6334                 /* set load_cs_sh_regs */
6335                 dw2 |= 0x01000000;
6336                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6337                 dw2 |= 0x10002;
6338
6339                 /* set load_ce_ram if preamble presented */
6340                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6341                         dw2 |= 0x10000000;
6342         } else {
6343                 /* still load_ce_ram if this is the first time preamble presented
6344                  * although there is no context switch happens.
6345                  */
6346                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6347                         dw2 |= 0x10000000;
6348         }
6349
6350         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6351         amdgpu_ring_write(ring, dw2);
6352         amdgpu_ring_write(ring, 0);
6353 }
6354
6355 static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
6356 {
6357         return
6358                 4; /* gfx_v8_0_ring_emit_ib_gfx */
6359 }
6360
6361 static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
6362 {
6363         return
6364                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6365                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6366                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6367                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6368                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6369                 256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6370                 2 + /* gfx_v8_ring_emit_sb */
6371                 3; /* gfx_v8_ring_emit_cntxcntl */
6372 }
6373
6374 static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
6375 {
6376         return
6377                 4; /* gfx_v8_0_ring_emit_ib_compute */
6378 }
6379
6380 static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
6381 {
6382         return
6383                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6384                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6385                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6386                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6387                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6388                 7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6389 }
6390
6391 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6392                                                  enum amdgpu_interrupt_state state)
6393 {
6394         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6395                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6396 }
6397
6398 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6399                                                      int me, int pipe,
6400                                                      enum amdgpu_interrupt_state state)
6401 {
6402         /*
6403          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6404          * handles the setting of interrupts for this specific pipe. All other
6405          * pipes' interrupts are set by amdkfd.
6406          */
6407
6408         if (me == 1) {
6409                 switch (pipe) {
6410                 case 0:
6411                         break;
6412                 default:
6413                         DRM_DEBUG("invalid pipe %d\n", pipe);
6414                         return;
6415                 }
6416         } else {
6417                 DRM_DEBUG("invalid me %d\n", me);
6418                 return;
6419         }
6420
6421         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6422                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6423 }
6424
6425 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6426                                              struct amdgpu_irq_src *source,
6427                                              unsigned type,
6428                                              enum amdgpu_interrupt_state state)
6429 {
6430         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6431                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6432
6433         return 0;
6434 }
6435
6436 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6437                                               struct amdgpu_irq_src *source,
6438                                               unsigned type,
6439                                               enum amdgpu_interrupt_state state)
6440 {
6441         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6442                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6443
6444         return 0;
6445 }
6446
6447 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6448                                             struct amdgpu_irq_src *src,
6449                                             unsigned type,
6450                                             enum amdgpu_interrupt_state state)
6451 {
6452         switch (type) {
6453         case AMDGPU_CP_IRQ_GFX_EOP:
6454                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6455                 break;
6456         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6457                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6458                 break;
6459         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6460                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6461                 break;
6462         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6463                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6464                 break;
6465         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6466                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6467                 break;
6468         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6469                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6470                 break;
6471         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6472                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6473                 break;
6474         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6475                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6476                 break;
6477         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6478                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6479                 break;
6480         default:
6481                 break;
6482         }
6483         return 0;
6484 }
6485
6486 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6487                             struct amdgpu_irq_src *source,
6488                             struct amdgpu_iv_entry *entry)
6489 {
6490         int i;
6491         u8 me_id, pipe_id, queue_id;
6492         struct amdgpu_ring *ring;
6493
6494         DRM_DEBUG("IH: CP EOP\n");
6495         me_id = (entry->ring_id & 0x0c) >> 2;
6496         pipe_id = (entry->ring_id & 0x03) >> 0;
6497         queue_id = (entry->ring_id & 0x70) >> 4;
6498
6499         switch (me_id) {
6500         case 0:
6501                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6502                 break;
6503         case 1:
6504         case 2:
6505                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6506                         ring = &adev->gfx.compute_ring[i];
6507                         /* Per-queue interrupt is supported for MEC starting from VI.
6508                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6509                           */
6510                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6511                                 amdgpu_fence_process(ring);
6512                 }
6513                 break;
6514         }
6515         return 0;
6516 }
6517
6518 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6519                                  struct amdgpu_irq_src *source,
6520                                  struct amdgpu_iv_entry *entry)
6521 {
6522         DRM_ERROR("Illegal register access in command stream\n");
6523         schedule_work(&adev->reset_work);
6524         return 0;
6525 }
6526
6527 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6528                                   struct amdgpu_irq_src *source,
6529                                   struct amdgpu_iv_entry *entry)
6530 {
6531         DRM_ERROR("Illegal instruction in command stream\n");
6532         schedule_work(&adev->reset_work);
6533         return 0;
6534 }
6535
6536 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6537         .name = "gfx_v8_0",
6538         .early_init = gfx_v8_0_early_init,
6539         .late_init = gfx_v8_0_late_init,
6540         .sw_init = gfx_v8_0_sw_init,
6541         .sw_fini = gfx_v8_0_sw_fini,
6542         .hw_init = gfx_v8_0_hw_init,
6543         .hw_fini = gfx_v8_0_hw_fini,
6544         .suspend = gfx_v8_0_suspend,
6545         .resume = gfx_v8_0_resume,
6546         .is_idle = gfx_v8_0_is_idle,
6547         .wait_for_idle = gfx_v8_0_wait_for_idle,
6548         .check_soft_reset = gfx_v8_0_check_soft_reset,
6549         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6550         .soft_reset = gfx_v8_0_soft_reset,
6551         .post_soft_reset = gfx_v8_0_post_soft_reset,
6552         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6553         .set_powergating_state = gfx_v8_0_set_powergating_state,
6554 };
6555
6556 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6557         .get_rptr = gfx_v8_0_ring_get_rptr,
6558         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6559         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6560         .parse_cs = NULL,
6561         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6562         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6563         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6564         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6565         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6566         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6567         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6568         .test_ring = gfx_v8_0_ring_test_ring,
6569         .test_ib = gfx_v8_0_ring_test_ib,
6570         .insert_nop = amdgpu_ring_insert_nop,
6571         .pad_ib = amdgpu_ring_generic_pad_ib,
6572         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6573         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6574         .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx,
6575         .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx,
6576 };
6577
6578 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6579         .get_rptr = gfx_v8_0_ring_get_rptr,
6580         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6581         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6582         .parse_cs = NULL,
6583         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6584         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6585         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6586         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6587         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6588         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6589         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6590         .test_ring = gfx_v8_0_ring_test_ring,
6591         .test_ib = gfx_v8_0_ring_test_ib,
6592         .insert_nop = amdgpu_ring_insert_nop,
6593         .pad_ib = amdgpu_ring_generic_pad_ib,
6594         .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute,
6595         .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute,
6596 };
6597
6598 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6599 {
6600         int i;
6601
6602         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6603                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6604
6605         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6606                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6607 }
6608
6609 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6610         .set = gfx_v8_0_set_eop_interrupt_state,
6611         .process = gfx_v8_0_eop_irq,
6612 };
6613
6614 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6615         .set = gfx_v8_0_set_priv_reg_fault_state,
6616         .process = gfx_v8_0_priv_reg_irq,
6617 };
6618
6619 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6620         .set = gfx_v8_0_set_priv_inst_fault_state,
6621         .process = gfx_v8_0_priv_inst_irq,
6622 };
6623
6624 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6625 {
6626         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6627         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6628
6629         adev->gfx.priv_reg_irq.num_types = 1;
6630         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6631
6632         adev->gfx.priv_inst_irq.num_types = 1;
6633         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6634 }
6635
6636 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6637 {
6638         switch (adev->asic_type) {
6639         case CHIP_TOPAZ:
6640                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6641                 break;
6642         case CHIP_STONEY:
6643         case CHIP_CARRIZO:
6644                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6645                 break;
6646         default:
6647                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6648                 break;
6649         }
6650 }
6651
6652 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6653 {
6654         /* init asci gds info */
6655         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6656         adev->gds.gws.total_size = 64;
6657         adev->gds.oa.total_size = 16;
6658
6659         if (adev->gds.mem.total_size == 64 * 1024) {
6660                 adev->gds.mem.gfx_partition_size = 4096;
6661                 adev->gds.mem.cs_partition_size = 4096;
6662
6663                 adev->gds.gws.gfx_partition_size = 4;
6664                 adev->gds.gws.cs_partition_size = 4;
6665
6666                 adev->gds.oa.gfx_partition_size = 4;
6667                 adev->gds.oa.cs_partition_size = 1;
6668         } else {
6669                 adev->gds.mem.gfx_partition_size = 1024;
6670                 adev->gds.mem.cs_partition_size = 1024;
6671
6672                 adev->gds.gws.gfx_partition_size = 16;
6673                 adev->gds.gws.cs_partition_size = 16;
6674
6675                 adev->gds.oa.gfx_partition_size = 4;
6676                 adev->gds.oa.cs_partition_size = 4;
6677         }
6678 }
6679
6680 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6681                                                  u32 bitmap)
6682 {
6683         u32 data;
6684
6685         if (!bitmap)
6686                 return;
6687
6688         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6689         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6690
6691         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6692 }
6693
6694 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6695 {
6696         u32 data, mask;
6697
6698         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6699                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6700
6701         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6702
6703         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6704 }
6705
6706 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6707 {
6708         int i, j, k, counter, active_cu_number = 0;
6709         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6710         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6711         unsigned disable_masks[4 * 2];
6712
6713         memset(cu_info, 0, sizeof(*cu_info));
6714
6715         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6716
6717         mutex_lock(&adev->grbm_idx_mutex);
6718         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6719                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6720                         mask = 1;
6721                         ao_bitmap = 0;
6722                         counter = 0;
6723                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6724                         if (i < 4 && j < 2)
6725                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6726                                         adev, disable_masks[i * 2 + j]);
6727                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6728                         cu_info->bitmap[i][j] = bitmap;
6729
6730                         for (k = 0; k < 16; k ++) {
6731                                 if (bitmap & mask) {
6732                                         if (counter < 2)
6733                                                 ao_bitmap |= mask;
6734                                         counter ++;
6735                                 }
6736                                 mask <<= 1;
6737                         }
6738                         active_cu_number += counter;
6739                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6740                 }
6741         }
6742         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6743         mutex_unlock(&adev->grbm_idx_mutex);
6744
6745         cu_info->number = active_cu_number;
6746         cu_info->ao_cu_mask = ao_cu_mask;
6747 }