ARM: dts: socfpga: fix definitions of serial console
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "clearstate_vi.h"
32
33 #include "gmc/gmc_8_2_d.h"
34 #include "gmc/gmc_8_2_sh_mask.h"
35
36 #include "oss/oss_3_0_d.h"
37 #include "oss/oss_3_0_sh_mask.h"
38
39 #include "bif/bif_5_0_d.h"
40 #include "bif/bif_5_0_sh_mask.h"
41
42 #include "gca/gfx_8_0_d.h"
43 #include "gca/gfx_8_0_enum.h"
44 #include "gca/gfx_8_0_sh_mask.h"
45 #include "gca/gfx_8_0_enum.h"
46
47 #include "dce/dce_10_0_d.h"
48 #include "dce/dce_10_0_sh_mask.h"
49
50 #define GFX8_NUM_GFX_RINGS     1
51 #define GFX8_NUM_COMPUTE_RINGS 8
52
53 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
55 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
56 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
57
58 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
59 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
60 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
61 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
62 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
63 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
64 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
65 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
66 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
67
68 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
69 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
70 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
71 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
72 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
73 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
74
75 /* BPM SERDES CMD */
76 #define SET_BPM_SERDES_CMD    1
77 #define CLE_BPM_SERDES_CMD    0
78
79 /* BPM Register Address*/
80 enum {
81         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
82         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
83         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
84         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
85         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
86         BPM_REG_FGCG_MAX
87 };
88
89 #define RLC_FormatDirectRegListLength        14
90
91 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
94 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
97
98 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
99 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
100 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
101 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
103
104 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
107 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
110
111 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
112 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
113 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
114 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
116
117 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
120 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
123
124 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
125 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
126 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
127 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
130
131 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
137
138 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
139 {
140         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
141         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
142         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
143         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
144         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
145         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
146         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
147         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
148         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
149         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
150         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
151         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
152         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
153         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
154         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
155         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
156 };
157
158 static const u32 golden_settings_tonga_a11[] =
159 {
160         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
161         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
162         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
163         mmGB_GPU_ID, 0x0000000f, 0x00000000,
164         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
165         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
166         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
167         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
168         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
169         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
170         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
171         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
172         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
173         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
174         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
175 };
176
177 static const u32 tonga_golden_common_all[] =
178 {
179         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
180         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
181         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
182         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
183         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
184         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
185         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
186         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
187 };
188
189 static const u32 tonga_mgcg_cgcg_init[] =
190 {
191         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
192         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
193         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
194         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
195         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
196         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
198         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
200         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
201         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
202         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
203         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
205         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
209         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
210         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
213         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
214         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
216         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
217         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
218         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
219         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
220         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
222         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
225         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
230         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
235         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
240         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
245         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
248         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
249         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
250         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
251         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
252         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
253         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
254         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
255         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
256         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
257         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
258         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
259         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
260         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
261         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
262         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
263         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
264         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
265         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
266 };
267
268 static const u32 golden_settings_polaris11_a11[] =
269 {
270         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
271         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
272         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
273         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
274         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
275         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
276         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
277         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
278         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
279         mmSQ_CONFIG, 0x07f80000, 0x07180000,
280         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
281         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
282         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
283         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
284         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
285 };
286
287 static const u32 polaris11_golden_common_all[] =
288 {
289         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
290         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
291         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
292         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
293         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
294         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
295 };
296
297 static const u32 golden_settings_polaris10_a11[] =
298 {
299         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
300         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
301         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
302         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
303         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
304         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
305         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
306         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
307         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
308         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
309         mmSQ_CONFIG, 0x07f80000, 0x07180000,
310         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
311         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
312         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
313         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
314 };
315
316 static const u32 polaris10_golden_common_all[] =
317 {
318         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
320         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
321         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
322         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
323         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
324         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
325         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
326 };
327
328 static const u32 fiji_golden_common_all[] =
329 {
330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
332         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
333         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
334         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
335         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
336         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
337         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
338         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
339         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
340 };
341
342 static const u32 golden_settings_fiji_a10[] =
343 {
344         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
345         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
346         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
347         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
350         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
351         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
352         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
354         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
355 };
356
357 static const u32 fiji_mgcg_cgcg_init[] =
358 {
359         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
360         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
361         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
362         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
364         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
366         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
367         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
368         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
370         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
373         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
377         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
378         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
379         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
380         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
381         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
385         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
386         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
387         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
388         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
389         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
391         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
392         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
393         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
394 };
395
396 static const u32 golden_settings_iceland_a11[] =
397 {
398         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
399         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
400         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
401         mmGB_GPU_ID, 0x0000000f, 0x00000000,
402         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
403         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
404         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
405         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
406         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
407         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
408         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
409         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
410         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
411         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
412         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
413 };
414
415 static const u32 iceland_golden_common_all[] =
416 {
417         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
418         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
419         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
420         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
421         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
422         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
423         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
424         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
425 };
426
427 static const u32 iceland_mgcg_cgcg_init[] =
428 {
429         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
430         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
431         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
432         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
434         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
435         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
436         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
438         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
440         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
451         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
452         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
454         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
455         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
456         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
459         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
460         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
461         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
462         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
463         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
464         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
465         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
466         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
467         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
468         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
469         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
470         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
471         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
472         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
473         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
474         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
475         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
476         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
477         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
478         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
479         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
480         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
481         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
482         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
483         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
484         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
485         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
486         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
487         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
488         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
489         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
490         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
491         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
492         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
493 };
494
495 static const u32 cz_golden_settings_a11[] =
496 {
497         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
498         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
499         mmGB_GPU_ID, 0x0000000f, 0x00000000,
500         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
501         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
502         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
503         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
504         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
505         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
506         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
507 };
508
509 static const u32 cz_golden_common_all[] =
510 {
511         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
512         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
513         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
514         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
515         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
516         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
517         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
518         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
519 };
520
521 static const u32 cz_mgcg_cgcg_init[] =
522 {
523         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
524         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
525         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
526         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
527         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
529         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
530         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
531         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
532         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
533         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
534         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
535         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
541         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
542         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
543         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
544         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
545         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
548         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
549         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
550         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
551         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
552         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
553         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
554         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
555         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
556         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
557         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
558         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
559         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
560         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
561         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
562         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
563         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
564         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
567         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
572         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
577         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
582         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
587         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
592         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
595         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
596         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
597         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
598 };
599
600 static const u32 stoney_golden_settings_a11[] =
601 {
602         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
603         mmGB_GPU_ID, 0x0000000f, 0x00000000,
604         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
605         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
606         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
607         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
608         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
609         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
610         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
611         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
612 };
613
614 static const u32 stoney_golden_common_all[] =
615 {
616         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
617         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
618         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
619         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
620         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
621         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
622         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
623         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
624 };
625
626 static const u32 stoney_mgcg_cgcg_init[] =
627 {
628         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
629         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
630         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
631         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
632         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
633         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
634 };
635
636 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
637 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
638 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
639 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
640 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
641 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
642
643 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
644 {
645         switch (adev->asic_type) {
646         case CHIP_TOPAZ:
647                 amdgpu_program_register_sequence(adev,
648                                                  iceland_mgcg_cgcg_init,
649                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
650                 amdgpu_program_register_sequence(adev,
651                                                  golden_settings_iceland_a11,
652                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
653                 amdgpu_program_register_sequence(adev,
654                                                  iceland_golden_common_all,
655                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
656                 break;
657         case CHIP_FIJI:
658                 amdgpu_program_register_sequence(adev,
659                                                  fiji_mgcg_cgcg_init,
660                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
661                 amdgpu_program_register_sequence(adev,
662                                                  golden_settings_fiji_a10,
663                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
664                 amdgpu_program_register_sequence(adev,
665                                                  fiji_golden_common_all,
666                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
667                 break;
668
669         case CHIP_TONGA:
670                 amdgpu_program_register_sequence(adev,
671                                                  tonga_mgcg_cgcg_init,
672                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
673                 amdgpu_program_register_sequence(adev,
674                                                  golden_settings_tonga_a11,
675                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
676                 amdgpu_program_register_sequence(adev,
677                                                  tonga_golden_common_all,
678                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
679                 break;
680         case CHIP_POLARIS11:
681                 amdgpu_program_register_sequence(adev,
682                                                  golden_settings_polaris11_a11,
683                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
684                 amdgpu_program_register_sequence(adev,
685                                                  polaris11_golden_common_all,
686                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
687                 break;
688         case CHIP_POLARIS10:
689                 amdgpu_program_register_sequence(adev,
690                                                  golden_settings_polaris10_a11,
691                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
692                 amdgpu_program_register_sequence(adev,
693                                                  polaris10_golden_common_all,
694                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
695                 break;
696         case CHIP_CARRIZO:
697                 amdgpu_program_register_sequence(adev,
698                                                  cz_mgcg_cgcg_init,
699                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
700                 amdgpu_program_register_sequence(adev,
701                                                  cz_golden_settings_a11,
702                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
703                 amdgpu_program_register_sequence(adev,
704                                                  cz_golden_common_all,
705                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
706                 break;
707         case CHIP_STONEY:
708                 amdgpu_program_register_sequence(adev,
709                                                  stoney_mgcg_cgcg_init,
710                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
711                 amdgpu_program_register_sequence(adev,
712                                                  stoney_golden_settings_a11,
713                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
714                 amdgpu_program_register_sequence(adev,
715                                                  stoney_golden_common_all,
716                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
717                 break;
718         default:
719                 break;
720         }
721 }
722
723 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
724 {
725         int i;
726
727         adev->gfx.scratch.num_reg = 7;
728         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
729         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
730                 adev->gfx.scratch.free[i] = true;
731                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
732         }
733 }
734
735 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
736 {
737         struct amdgpu_device *adev = ring->adev;
738         uint32_t scratch;
739         uint32_t tmp = 0;
740         unsigned i;
741         int r;
742
743         r = amdgpu_gfx_scratch_get(adev, &scratch);
744         if (r) {
745                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
746                 return r;
747         }
748         WREG32(scratch, 0xCAFEDEAD);
749         r = amdgpu_ring_alloc(ring, 3);
750         if (r) {
751                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
752                           ring->idx, r);
753                 amdgpu_gfx_scratch_free(adev, scratch);
754                 return r;
755         }
756         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
757         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
758         amdgpu_ring_write(ring, 0xDEADBEEF);
759         amdgpu_ring_commit(ring);
760
761         for (i = 0; i < adev->usec_timeout; i++) {
762                 tmp = RREG32(scratch);
763                 if (tmp == 0xDEADBEEF)
764                         break;
765                 DRM_UDELAY(1);
766         }
767         if (i < adev->usec_timeout) {
768                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
769                          ring->idx, i);
770         } else {
771                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
772                           ring->idx, scratch, tmp);
773                 r = -EINVAL;
774         }
775         amdgpu_gfx_scratch_free(adev, scratch);
776         return r;
777 }
778
779 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
780 {
781         struct amdgpu_device *adev = ring->adev;
782         struct amdgpu_ib ib;
783         struct fence *f = NULL;
784         uint32_t scratch;
785         uint32_t tmp = 0;
786         unsigned i;
787         int r;
788
789         r = amdgpu_gfx_scratch_get(adev, &scratch);
790         if (r) {
791                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
792                 return r;
793         }
794         WREG32(scratch, 0xCAFEDEAD);
795         memset(&ib, 0, sizeof(ib));
796         r = amdgpu_ib_get(adev, NULL, 256, &ib);
797         if (r) {
798                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
799                 goto err1;
800         }
801         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
802         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
803         ib.ptr[2] = 0xDEADBEEF;
804         ib.length_dw = 3;
805
806         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
807         if (r)
808                 goto err2;
809
810         r = fence_wait(f, false);
811         if (r) {
812                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
813                 goto err2;
814         }
815         for (i = 0; i < adev->usec_timeout; i++) {
816                 tmp = RREG32(scratch);
817                 if (tmp == 0xDEADBEEF)
818                         break;
819                 DRM_UDELAY(1);
820         }
821         if (i < adev->usec_timeout) {
822                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
823                          ring->idx, i);
824                 goto err2;
825         } else {
826                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
827                           scratch, tmp);
828                 r = -EINVAL;
829         }
830 err2:
831         fence_put(f);
832         amdgpu_ib_free(adev, &ib, NULL);
833         fence_put(f);
834 err1:
835         amdgpu_gfx_scratch_free(adev, scratch);
836         return r;
837 }
838
839 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
840 {
841         const char *chip_name;
842         char fw_name[30];
843         int err;
844         struct amdgpu_firmware_info *info = NULL;
845         const struct common_firmware_header *header = NULL;
846         const struct gfx_firmware_header_v1_0 *cp_hdr;
847         const struct rlc_firmware_header_v2_0 *rlc_hdr;
848         unsigned int *tmp = NULL, i;
849
850         DRM_DEBUG("\n");
851
852         switch (adev->asic_type) {
853         case CHIP_TOPAZ:
854                 chip_name = "topaz";
855                 break;
856         case CHIP_TONGA:
857                 chip_name = "tonga";
858                 break;
859         case CHIP_CARRIZO:
860                 chip_name = "carrizo";
861                 break;
862         case CHIP_FIJI:
863                 chip_name = "fiji";
864                 break;
865         case CHIP_POLARIS11:
866                 chip_name = "polaris11";
867                 break;
868         case CHIP_POLARIS10:
869                 chip_name = "polaris10";
870                 break;
871         case CHIP_STONEY:
872                 chip_name = "stoney";
873                 break;
874         default:
875                 BUG();
876         }
877
878         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
879         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
880         if (err)
881                 goto out;
882         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
883         if (err)
884                 goto out;
885         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
886         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
887         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
888
889         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
890         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
891         if (err)
892                 goto out;
893         err = amdgpu_ucode_validate(adev->gfx.me_fw);
894         if (err)
895                 goto out;
896         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
897         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
898         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
899
900         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
901         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
902         if (err)
903                 goto out;
904         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
905         if (err)
906                 goto out;
907         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
908         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
909         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
910
911         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
912         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
913         if (err)
914                 goto out;
915         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
916         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
917         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
918         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
919
920         adev->gfx.rlc.save_and_restore_offset =
921                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
922         adev->gfx.rlc.clear_state_descriptor_offset =
923                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
924         adev->gfx.rlc.avail_scratch_ram_locations =
925                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
926         adev->gfx.rlc.reg_restore_list_size =
927                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
928         adev->gfx.rlc.reg_list_format_start =
929                         le32_to_cpu(rlc_hdr->reg_list_format_start);
930         adev->gfx.rlc.reg_list_format_separate_start =
931                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
932         adev->gfx.rlc.starting_offsets_start =
933                         le32_to_cpu(rlc_hdr->starting_offsets_start);
934         adev->gfx.rlc.reg_list_format_size_bytes =
935                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
936         adev->gfx.rlc.reg_list_size_bytes =
937                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
938
939         adev->gfx.rlc.register_list_format =
940                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
941                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
942
943         if (!adev->gfx.rlc.register_list_format) {
944                 err = -ENOMEM;
945                 goto out;
946         }
947
948         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
949                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
950         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
951                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
952
953         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
954
955         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
956                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
957         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
958                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
959
960         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
961         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
962         if (err)
963                 goto out;
964         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
965         if (err)
966                 goto out;
967         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
968         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
969         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
970
971         if ((adev->asic_type != CHIP_STONEY) &&
972             (adev->asic_type != CHIP_TOPAZ)) {
973                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
974                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
975                 if (!err) {
976                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
977                         if (err)
978                                 goto out;
979                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
980                                 adev->gfx.mec2_fw->data;
981                         adev->gfx.mec2_fw_version =
982                                 le32_to_cpu(cp_hdr->header.ucode_version);
983                         adev->gfx.mec2_feature_version =
984                                 le32_to_cpu(cp_hdr->ucode_feature_version);
985                 } else {
986                         err = 0;
987                         adev->gfx.mec2_fw = NULL;
988                 }
989         }
990
991         if (adev->firmware.smu_load) {
992                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
993                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
994                 info->fw = adev->gfx.pfp_fw;
995                 header = (const struct common_firmware_header *)info->fw->data;
996                 adev->firmware.fw_size +=
997                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
998
999                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1000                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1001                 info->fw = adev->gfx.me_fw;
1002                 header = (const struct common_firmware_header *)info->fw->data;
1003                 adev->firmware.fw_size +=
1004                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1005
1006                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1007                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1008                 info->fw = adev->gfx.ce_fw;
1009                 header = (const struct common_firmware_header *)info->fw->data;
1010                 adev->firmware.fw_size +=
1011                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1012
1013                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1014                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1015                 info->fw = adev->gfx.rlc_fw;
1016                 header = (const struct common_firmware_header *)info->fw->data;
1017                 adev->firmware.fw_size +=
1018                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1019
1020                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1021                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1022                 info->fw = adev->gfx.mec_fw;
1023                 header = (const struct common_firmware_header *)info->fw->data;
1024                 adev->firmware.fw_size +=
1025                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1026
1027                 if (adev->gfx.mec2_fw) {
1028                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1029                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1030                         info->fw = adev->gfx.mec2_fw;
1031                         header = (const struct common_firmware_header *)info->fw->data;
1032                         adev->firmware.fw_size +=
1033                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1034                 }
1035
1036         }
1037
1038 out:
1039         if (err) {
1040                 dev_err(adev->dev,
1041                         "gfx8: Failed to load firmware \"%s\"\n",
1042                         fw_name);
1043                 release_firmware(adev->gfx.pfp_fw);
1044                 adev->gfx.pfp_fw = NULL;
1045                 release_firmware(adev->gfx.me_fw);
1046                 adev->gfx.me_fw = NULL;
1047                 release_firmware(adev->gfx.ce_fw);
1048                 adev->gfx.ce_fw = NULL;
1049                 release_firmware(adev->gfx.rlc_fw);
1050                 adev->gfx.rlc_fw = NULL;
1051                 release_firmware(adev->gfx.mec_fw);
1052                 adev->gfx.mec_fw = NULL;
1053                 release_firmware(adev->gfx.mec2_fw);
1054                 adev->gfx.mec2_fw = NULL;
1055         }
1056         return err;
1057 }
1058
1059 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1060                                     volatile u32 *buffer)
1061 {
1062         u32 count = 0, i;
1063         const struct cs_section_def *sect = NULL;
1064         const struct cs_extent_def *ext = NULL;
1065
1066         if (adev->gfx.rlc.cs_data == NULL)
1067                 return;
1068         if (buffer == NULL)
1069                 return;
1070
1071         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1072         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1073
1074         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1075         buffer[count++] = cpu_to_le32(0x80000000);
1076         buffer[count++] = cpu_to_le32(0x80000000);
1077
1078         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1079                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1080                         if (sect->id == SECT_CONTEXT) {
1081                                 buffer[count++] =
1082                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1083                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1084                                                 PACKET3_SET_CONTEXT_REG_START);
1085                                 for (i = 0; i < ext->reg_count; i++)
1086                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1087                         } else {
1088                                 return;
1089                         }
1090                 }
1091         }
1092
1093         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1094         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1095                         PACKET3_SET_CONTEXT_REG_START);
1096         switch (adev->asic_type) {
1097         case CHIP_TONGA:
1098         case CHIP_POLARIS10:
1099                 buffer[count++] = cpu_to_le32(0x16000012);
1100                 buffer[count++] = cpu_to_le32(0x0000002A);
1101                 break;
1102         case CHIP_POLARIS11:
1103                 buffer[count++] = cpu_to_le32(0x16000012);
1104                 buffer[count++] = cpu_to_le32(0x00000000);
1105                 break;
1106         case CHIP_FIJI:
1107                 buffer[count++] = cpu_to_le32(0x3a00161a);
1108                 buffer[count++] = cpu_to_le32(0x0000002e);
1109                 break;
1110         case CHIP_TOPAZ:
1111         case CHIP_CARRIZO:
1112                 buffer[count++] = cpu_to_le32(0x00000002);
1113                 buffer[count++] = cpu_to_le32(0x00000000);
1114                 break;
1115         case CHIP_STONEY:
1116                 buffer[count++] = cpu_to_le32(0x00000000);
1117                 buffer[count++] = cpu_to_le32(0x00000000);
1118                 break;
1119         default:
1120                 buffer[count++] = cpu_to_le32(0x00000000);
1121                 buffer[count++] = cpu_to_le32(0x00000000);
1122                 break;
1123         }
1124
1125         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1126         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1127
1128         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1129         buffer[count++] = cpu_to_le32(0);
1130 }
1131
1132 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1133 {
1134         int r;
1135
1136         /* clear state block */
1137         if (adev->gfx.rlc.clear_state_obj) {
1138                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1139                 if (unlikely(r != 0))
1140                         dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1141                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1142                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1143
1144                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1145                 adev->gfx.rlc.clear_state_obj = NULL;
1146         }
1147 }
1148
1149 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1150 {
1151         volatile u32 *dst_ptr;
1152         u32 dws;
1153         const struct cs_section_def *cs_data;
1154         int r;
1155
1156         adev->gfx.rlc.cs_data = vi_cs_data;
1157
1158         cs_data = adev->gfx.rlc.cs_data;
1159
1160         if (cs_data) {
1161                 /* clear state block */
1162                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1163
1164                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1165                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1166                                              AMDGPU_GEM_DOMAIN_VRAM,
1167                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1168                                              NULL, NULL,
1169                                              &adev->gfx.rlc.clear_state_obj);
1170                         if (r) {
1171                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1172                                 gfx_v8_0_rlc_fini(adev);
1173                                 return r;
1174                         }
1175                 }
1176                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1177                 if (unlikely(r != 0)) {
1178                         gfx_v8_0_rlc_fini(adev);
1179                         return r;
1180                 }
1181                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1182                                   &adev->gfx.rlc.clear_state_gpu_addr);
1183                 if (r) {
1184                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1185                         dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1186                         gfx_v8_0_rlc_fini(adev);
1187                         return r;
1188                 }
1189
1190                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1191                 if (r) {
1192                         dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1193                         gfx_v8_0_rlc_fini(adev);
1194                         return r;
1195                 }
1196                 /* set up the cs buffer */
1197                 dst_ptr = adev->gfx.rlc.cs_ptr;
1198                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1199                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1200                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1201         }
1202
1203         return 0;
1204 }
1205
1206 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1207 {
1208         int r;
1209
1210         if (adev->gfx.mec.hpd_eop_obj) {
1211                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1212                 if (unlikely(r != 0))
1213                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1214                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1215                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1216
1217                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1218                 adev->gfx.mec.hpd_eop_obj = NULL;
1219         }
1220 }
1221
1222 #define MEC_HPD_SIZE 2048
1223
1224 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1225 {
1226         int r;
1227         u32 *hpd;
1228
1229         /*
1230          * we assign only 1 pipe because all other pipes will
1231          * be handled by KFD
1232          */
1233         adev->gfx.mec.num_mec = 1;
1234         adev->gfx.mec.num_pipe = 1;
1235         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1236
1237         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1238                 r = amdgpu_bo_create(adev,
1239                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1240                                      PAGE_SIZE, true,
1241                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1242                                      &adev->gfx.mec.hpd_eop_obj);
1243                 if (r) {
1244                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1245                         return r;
1246                 }
1247         }
1248
1249         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1250         if (unlikely(r != 0)) {
1251                 gfx_v8_0_mec_fini(adev);
1252                 return r;
1253         }
1254         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1255                           &adev->gfx.mec.hpd_eop_gpu_addr);
1256         if (r) {
1257                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1258                 gfx_v8_0_mec_fini(adev);
1259                 return r;
1260         }
1261         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1262         if (r) {
1263                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1264                 gfx_v8_0_mec_fini(adev);
1265                 return r;
1266         }
1267
1268         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1269
1270         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1271         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1272
1273         return 0;
1274 }
1275
1276 static const u32 vgpr_init_compute_shader[] =
1277 {
1278         0x7e000209, 0x7e020208,
1279         0x7e040207, 0x7e060206,
1280         0x7e080205, 0x7e0a0204,
1281         0x7e0c0203, 0x7e0e0202,
1282         0x7e100201, 0x7e120200,
1283         0x7e140209, 0x7e160208,
1284         0x7e180207, 0x7e1a0206,
1285         0x7e1c0205, 0x7e1e0204,
1286         0x7e200203, 0x7e220202,
1287         0x7e240201, 0x7e260200,
1288         0x7e280209, 0x7e2a0208,
1289         0x7e2c0207, 0x7e2e0206,
1290         0x7e300205, 0x7e320204,
1291         0x7e340203, 0x7e360202,
1292         0x7e380201, 0x7e3a0200,
1293         0x7e3c0209, 0x7e3e0208,
1294         0x7e400207, 0x7e420206,
1295         0x7e440205, 0x7e460204,
1296         0x7e480203, 0x7e4a0202,
1297         0x7e4c0201, 0x7e4e0200,
1298         0x7e500209, 0x7e520208,
1299         0x7e540207, 0x7e560206,
1300         0x7e580205, 0x7e5a0204,
1301         0x7e5c0203, 0x7e5e0202,
1302         0x7e600201, 0x7e620200,
1303         0x7e640209, 0x7e660208,
1304         0x7e680207, 0x7e6a0206,
1305         0x7e6c0205, 0x7e6e0204,
1306         0x7e700203, 0x7e720202,
1307         0x7e740201, 0x7e760200,
1308         0x7e780209, 0x7e7a0208,
1309         0x7e7c0207, 0x7e7e0206,
1310         0xbf8a0000, 0xbf810000,
1311 };
1312
1313 static const u32 sgpr_init_compute_shader[] =
1314 {
1315         0xbe8a0100, 0xbe8c0102,
1316         0xbe8e0104, 0xbe900106,
1317         0xbe920108, 0xbe940100,
1318         0xbe960102, 0xbe980104,
1319         0xbe9a0106, 0xbe9c0108,
1320         0xbe9e0100, 0xbea00102,
1321         0xbea20104, 0xbea40106,
1322         0xbea60108, 0xbea80100,
1323         0xbeaa0102, 0xbeac0104,
1324         0xbeae0106, 0xbeb00108,
1325         0xbeb20100, 0xbeb40102,
1326         0xbeb60104, 0xbeb80106,
1327         0xbeba0108, 0xbebc0100,
1328         0xbebe0102, 0xbec00104,
1329         0xbec20106, 0xbec40108,
1330         0xbec60100, 0xbec80102,
1331         0xbee60004, 0xbee70005,
1332         0xbeea0006, 0xbeeb0007,
1333         0xbee80008, 0xbee90009,
1334         0xbefc0000, 0xbf8a0000,
1335         0xbf810000, 0x00000000,
1336 };
1337
1338 static const u32 vgpr_init_regs[] =
1339 {
1340         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1341         mmCOMPUTE_RESOURCE_LIMITS, 0,
1342         mmCOMPUTE_NUM_THREAD_X, 256*4,
1343         mmCOMPUTE_NUM_THREAD_Y, 1,
1344         mmCOMPUTE_NUM_THREAD_Z, 1,
1345         mmCOMPUTE_PGM_RSRC2, 20,
1346         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1347         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1348         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1349         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1350         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1351         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1352         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1353         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1354         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1355         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1356 };
1357
1358 static const u32 sgpr1_init_regs[] =
1359 {
1360         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1361         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1362         mmCOMPUTE_NUM_THREAD_X, 256*5,
1363         mmCOMPUTE_NUM_THREAD_Y, 1,
1364         mmCOMPUTE_NUM_THREAD_Z, 1,
1365         mmCOMPUTE_PGM_RSRC2, 20,
1366         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1367         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1368         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1369         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1370         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1371         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1372         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1373         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1374         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1375         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1376 };
1377
1378 static const u32 sgpr2_init_regs[] =
1379 {
1380         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1381         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1382         mmCOMPUTE_NUM_THREAD_X, 256*5,
1383         mmCOMPUTE_NUM_THREAD_Y, 1,
1384         mmCOMPUTE_NUM_THREAD_Z, 1,
1385         mmCOMPUTE_PGM_RSRC2, 20,
1386         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1387         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1388         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1389         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1390         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1391         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1392         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1393         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1394         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1395         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1396 };
1397
1398 static const u32 sec_ded_counter_registers[] =
1399 {
1400         mmCPC_EDC_ATC_CNT,
1401         mmCPC_EDC_SCRATCH_CNT,
1402         mmCPC_EDC_UCODE_CNT,
1403         mmCPF_EDC_ATC_CNT,
1404         mmCPF_EDC_ROQ_CNT,
1405         mmCPF_EDC_TAG_CNT,
1406         mmCPG_EDC_ATC_CNT,
1407         mmCPG_EDC_DMA_CNT,
1408         mmCPG_EDC_TAG_CNT,
1409         mmDC_EDC_CSINVOC_CNT,
1410         mmDC_EDC_RESTORE_CNT,
1411         mmDC_EDC_STATE_CNT,
1412         mmGDS_EDC_CNT,
1413         mmGDS_EDC_GRBM_CNT,
1414         mmGDS_EDC_OA_DED,
1415         mmSPI_EDC_CNT,
1416         mmSQC_ATC_EDC_GATCL1_CNT,
1417         mmSQC_EDC_CNT,
1418         mmSQ_EDC_DED_CNT,
1419         mmSQ_EDC_INFO,
1420         mmSQ_EDC_SEC_CNT,
1421         mmTCC_EDC_CNT,
1422         mmTCP_ATC_EDC_GATCL1_CNT,
1423         mmTCP_EDC_CNT,
1424         mmTD_EDC_CNT
1425 };
1426
1427 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1428 {
1429         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1430         struct amdgpu_ib ib;
1431         struct fence *f = NULL;
1432         int r, i;
1433         u32 tmp;
1434         unsigned total_size, vgpr_offset, sgpr_offset;
1435         u64 gpu_addr;
1436
1437         /* only supported on CZ */
1438         if (adev->asic_type != CHIP_CARRIZO)
1439                 return 0;
1440
1441         /* bail if the compute ring is not ready */
1442         if (!ring->ready)
1443                 return 0;
1444
1445         tmp = RREG32(mmGB_EDC_MODE);
1446         WREG32(mmGB_EDC_MODE, 0);
1447
1448         total_size =
1449                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1450         total_size +=
1451                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1452         total_size +=
1453                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1454         total_size = ALIGN(total_size, 256);
1455         vgpr_offset = total_size;
1456         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1457         sgpr_offset = total_size;
1458         total_size += sizeof(sgpr_init_compute_shader);
1459
1460         /* allocate an indirect buffer to put the commands in */
1461         memset(&ib, 0, sizeof(ib));
1462         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1463         if (r) {
1464                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1465                 return r;
1466         }
1467
1468         /* load the compute shaders */
1469         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1470                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1471
1472         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1473                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1474
1475         /* init the ib length to 0 */
1476         ib.length_dw = 0;
1477
1478         /* VGPR */
1479         /* write the register state for the compute dispatch */
1480         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1481                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1482                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1483                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1484         }
1485         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1486         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1487         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1488         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1489         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1490         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1491
1492         /* write dispatch packet */
1493         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1494         ib.ptr[ib.length_dw++] = 8; /* x */
1495         ib.ptr[ib.length_dw++] = 1; /* y */
1496         ib.ptr[ib.length_dw++] = 1; /* z */
1497         ib.ptr[ib.length_dw++] =
1498                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1499
1500         /* write CS partial flush packet */
1501         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1502         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1503
1504         /* SGPR1 */
1505         /* write the register state for the compute dispatch */
1506         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1507                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1508                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1509                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1510         }
1511         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1512         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1513         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1514         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1515         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1516         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1517
1518         /* write dispatch packet */
1519         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1520         ib.ptr[ib.length_dw++] = 8; /* x */
1521         ib.ptr[ib.length_dw++] = 1; /* y */
1522         ib.ptr[ib.length_dw++] = 1; /* z */
1523         ib.ptr[ib.length_dw++] =
1524                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1525
1526         /* write CS partial flush packet */
1527         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1528         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1529
1530         /* SGPR2 */
1531         /* write the register state for the compute dispatch */
1532         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1533                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1534                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1535                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1536         }
1537         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1538         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1539         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1540         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1541         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1542         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1543
1544         /* write dispatch packet */
1545         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1546         ib.ptr[ib.length_dw++] = 8; /* x */
1547         ib.ptr[ib.length_dw++] = 1; /* y */
1548         ib.ptr[ib.length_dw++] = 1; /* z */
1549         ib.ptr[ib.length_dw++] =
1550                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1551
1552         /* write CS partial flush packet */
1553         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1554         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1555
1556         /* shedule the ib on the ring */
1557         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1558         if (r) {
1559                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1560                 goto fail;
1561         }
1562
1563         /* wait for the GPU to finish processing the IB */
1564         r = fence_wait(f, false);
1565         if (r) {
1566                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1567                 goto fail;
1568         }
1569
1570         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1571         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1572         WREG32(mmGB_EDC_MODE, tmp);
1573
1574         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1575         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1576         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1577
1578
1579         /* read back registers to clear the counters */
1580         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1581                 RREG32(sec_ded_counter_registers[i]);
1582
1583 fail:
1584         fence_put(f);
1585         amdgpu_ib_free(adev, &ib, NULL);
1586         fence_put(f);
1587
1588         return r;
1589 }
1590
1591 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1592 {
1593         u32 gb_addr_config;
1594         u32 mc_shared_chmap, mc_arb_ramcfg;
1595         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1596         u32 tmp;
1597         int ret;
1598
1599         switch (adev->asic_type) {
1600         case CHIP_TOPAZ:
1601                 adev->gfx.config.max_shader_engines = 1;
1602                 adev->gfx.config.max_tile_pipes = 2;
1603                 adev->gfx.config.max_cu_per_sh = 6;
1604                 adev->gfx.config.max_sh_per_se = 1;
1605                 adev->gfx.config.max_backends_per_se = 2;
1606                 adev->gfx.config.max_texture_channel_caches = 2;
1607                 adev->gfx.config.max_gprs = 256;
1608                 adev->gfx.config.max_gs_threads = 32;
1609                 adev->gfx.config.max_hw_contexts = 8;
1610
1611                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1612                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1613                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1614                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1615                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1616                 break;
1617         case CHIP_FIJI:
1618                 adev->gfx.config.max_shader_engines = 4;
1619                 adev->gfx.config.max_tile_pipes = 16;
1620                 adev->gfx.config.max_cu_per_sh = 16;
1621                 adev->gfx.config.max_sh_per_se = 1;
1622                 adev->gfx.config.max_backends_per_se = 4;
1623                 adev->gfx.config.max_texture_channel_caches = 16;
1624                 adev->gfx.config.max_gprs = 256;
1625                 adev->gfx.config.max_gs_threads = 32;
1626                 adev->gfx.config.max_hw_contexts = 8;
1627
1628                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1629                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1630                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1631                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1632                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1633                 break;
1634         case CHIP_POLARIS11:
1635                 ret = amdgpu_atombios_get_gfx_info(adev);
1636                 if (ret)
1637                         return ret;
1638                 adev->gfx.config.max_gprs = 256;
1639                 adev->gfx.config.max_gs_threads = 32;
1640                 adev->gfx.config.max_hw_contexts = 8;
1641
1642                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1643                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1644                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1645                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1646                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1647                 break;
1648         case CHIP_POLARIS10:
1649                 ret = amdgpu_atombios_get_gfx_info(adev);
1650                 if (ret)
1651                         return ret;
1652                 adev->gfx.config.max_gprs = 256;
1653                 adev->gfx.config.max_gs_threads = 32;
1654                 adev->gfx.config.max_hw_contexts = 8;
1655
1656                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1657                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1658                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1659                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1660                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1661                 break;
1662         case CHIP_TONGA:
1663                 adev->gfx.config.max_shader_engines = 4;
1664                 adev->gfx.config.max_tile_pipes = 8;
1665                 adev->gfx.config.max_cu_per_sh = 8;
1666                 adev->gfx.config.max_sh_per_se = 1;
1667                 adev->gfx.config.max_backends_per_se = 2;
1668                 adev->gfx.config.max_texture_channel_caches = 8;
1669                 adev->gfx.config.max_gprs = 256;
1670                 adev->gfx.config.max_gs_threads = 32;
1671                 adev->gfx.config.max_hw_contexts = 8;
1672
1673                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1674                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1675                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1676                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1677                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1678                 break;
1679         case CHIP_CARRIZO:
1680                 adev->gfx.config.max_shader_engines = 1;
1681                 adev->gfx.config.max_tile_pipes = 2;
1682                 adev->gfx.config.max_sh_per_se = 1;
1683                 adev->gfx.config.max_backends_per_se = 2;
1684
1685                 switch (adev->pdev->revision) {
1686                 case 0xc4:
1687                 case 0x84:
1688                 case 0xc8:
1689                 case 0xcc:
1690                 case 0xe1:
1691                 case 0xe3:
1692                         /* B10 */
1693                         adev->gfx.config.max_cu_per_sh = 8;
1694                         break;
1695                 case 0xc5:
1696                 case 0x81:
1697                 case 0x85:
1698                 case 0xc9:
1699                 case 0xcd:
1700                 case 0xe2:
1701                 case 0xe4:
1702                         /* B8 */
1703                         adev->gfx.config.max_cu_per_sh = 6;
1704                         break;
1705                 case 0xc6:
1706                 case 0xca:
1707                 case 0xce:
1708                 case 0x88:
1709                         /* B6 */
1710                         adev->gfx.config.max_cu_per_sh = 6;
1711                         break;
1712                 case 0xc7:
1713                 case 0x87:
1714                 case 0xcb:
1715                 case 0xe5:
1716                 case 0x89:
1717                 default:
1718                         /* B4 */
1719                         adev->gfx.config.max_cu_per_sh = 4;
1720                         break;
1721                 }
1722
1723                 adev->gfx.config.max_texture_channel_caches = 2;
1724                 adev->gfx.config.max_gprs = 256;
1725                 adev->gfx.config.max_gs_threads = 32;
1726                 adev->gfx.config.max_hw_contexts = 8;
1727
1728                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1729                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1730                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1731                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1732                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1733                 break;
1734         case CHIP_STONEY:
1735                 adev->gfx.config.max_shader_engines = 1;
1736                 adev->gfx.config.max_tile_pipes = 2;
1737                 adev->gfx.config.max_sh_per_se = 1;
1738                 adev->gfx.config.max_backends_per_se = 1;
1739
1740                 switch (adev->pdev->revision) {
1741                 case 0xc0:
1742                 case 0xc1:
1743                 case 0xc2:
1744                 case 0xc4:
1745                 case 0xc8:
1746                 case 0xc9:
1747                         adev->gfx.config.max_cu_per_sh = 3;
1748                         break;
1749                 case 0xd0:
1750                 case 0xd1:
1751                 case 0xd2:
1752                 default:
1753                         adev->gfx.config.max_cu_per_sh = 2;
1754                         break;
1755                 }
1756
1757                 adev->gfx.config.max_texture_channel_caches = 2;
1758                 adev->gfx.config.max_gprs = 256;
1759                 adev->gfx.config.max_gs_threads = 16;
1760                 adev->gfx.config.max_hw_contexts = 8;
1761
1762                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1763                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1764                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1765                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1766                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1767                 break;
1768         default:
1769                 adev->gfx.config.max_shader_engines = 2;
1770                 adev->gfx.config.max_tile_pipes = 4;
1771                 adev->gfx.config.max_cu_per_sh = 2;
1772                 adev->gfx.config.max_sh_per_se = 1;
1773                 adev->gfx.config.max_backends_per_se = 2;
1774                 adev->gfx.config.max_texture_channel_caches = 4;
1775                 adev->gfx.config.max_gprs = 256;
1776                 adev->gfx.config.max_gs_threads = 32;
1777                 adev->gfx.config.max_hw_contexts = 8;
1778
1779                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1780                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1781                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1782                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1783                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1784                 break;
1785         }
1786
1787         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1788         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1789         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1790
1791         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1792         adev->gfx.config.mem_max_burst_length_bytes = 256;
1793         if (adev->flags & AMD_IS_APU) {
1794                 /* Get memory bank mapping mode. */
1795                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1796                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1797                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1798
1799                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1800                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1801                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1802
1803                 /* Validate settings in case only one DIMM installed. */
1804                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1805                         dimm00_addr_map = 0;
1806                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1807                         dimm01_addr_map = 0;
1808                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1809                         dimm10_addr_map = 0;
1810                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1811                         dimm11_addr_map = 0;
1812
1813                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1814                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1815                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1816                         adev->gfx.config.mem_row_size_in_kb = 2;
1817                 else
1818                         adev->gfx.config.mem_row_size_in_kb = 1;
1819         } else {
1820                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1821                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1822                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1823                         adev->gfx.config.mem_row_size_in_kb = 4;
1824         }
1825
1826         adev->gfx.config.shader_engine_tile_size = 32;
1827         adev->gfx.config.num_gpus = 1;
1828         adev->gfx.config.multi_gpu_tile_size = 64;
1829
1830         /* fix up row size */
1831         switch (adev->gfx.config.mem_row_size_in_kb) {
1832         case 1:
1833         default:
1834                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1835                 break;
1836         case 2:
1837                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1838                 break;
1839         case 4:
1840                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1841                 break;
1842         }
1843         adev->gfx.config.gb_addr_config = gb_addr_config;
1844
1845         return 0;
1846 }
1847
1848 static int gfx_v8_0_sw_init(void *handle)
1849 {
1850         int i, r;
1851         struct amdgpu_ring *ring;
1852         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1853
1854         /* EOP Event */
1855         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1856         if (r)
1857                 return r;
1858
1859         /* Privileged reg */
1860         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1861         if (r)
1862                 return r;
1863
1864         /* Privileged inst */
1865         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1866         if (r)
1867                 return r;
1868
1869         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1870
1871         gfx_v8_0_scratch_init(adev);
1872
1873         r = gfx_v8_0_init_microcode(adev);
1874         if (r) {
1875                 DRM_ERROR("Failed to load gfx firmware!\n");
1876                 return r;
1877         }
1878
1879         r = gfx_v8_0_rlc_init(adev);
1880         if (r) {
1881                 DRM_ERROR("Failed to init rlc BOs!\n");
1882                 return r;
1883         }
1884
1885         r = gfx_v8_0_mec_init(adev);
1886         if (r) {
1887                 DRM_ERROR("Failed to init MEC BOs!\n");
1888                 return r;
1889         }
1890
1891         /* set up the gfx ring */
1892         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1893                 ring = &adev->gfx.gfx_ring[i];
1894                 ring->ring_obj = NULL;
1895                 sprintf(ring->name, "gfx");
1896                 /* no gfx doorbells on iceland */
1897                 if (adev->asic_type != CHIP_TOPAZ) {
1898                         ring->use_doorbell = true;
1899                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1900                 }
1901
1902                 r = amdgpu_ring_init(adev, ring, 1024,
1903                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1904                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1905                                      AMDGPU_RING_TYPE_GFX);
1906                 if (r)
1907                         return r;
1908         }
1909
1910         /* set up the compute queues */
1911         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1912                 unsigned irq_type;
1913
1914                 /* max 32 queues per MEC */
1915                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1916                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1917                         break;
1918                 }
1919                 ring = &adev->gfx.compute_ring[i];
1920                 ring->ring_obj = NULL;
1921                 ring->use_doorbell = true;
1922                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1923                 ring->me = 1; /* first MEC */
1924                 ring->pipe = i / 8;
1925                 ring->queue = i % 8;
1926                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1927                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1928                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1929                 r = amdgpu_ring_init(adev, ring, 1024,
1930                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1931                                      &adev->gfx.eop_irq, irq_type,
1932                                      AMDGPU_RING_TYPE_COMPUTE);
1933                 if (r)
1934                         return r;
1935         }
1936
1937         /* reserve GDS, GWS and OA resource for gfx */
1938         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1939                         PAGE_SIZE, true,
1940                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1941                         NULL, &adev->gds.gds_gfx_bo);
1942         if (r)
1943                 return r;
1944
1945         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1946                 PAGE_SIZE, true,
1947                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1948                 NULL, &adev->gds.gws_gfx_bo);
1949         if (r)
1950                 return r;
1951
1952         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1953                         PAGE_SIZE, true,
1954                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1955                         NULL, &adev->gds.oa_gfx_bo);
1956         if (r)
1957                 return r;
1958
1959         adev->gfx.ce_ram_size = 0x8000;
1960
1961         r = gfx_v8_0_gpu_early_init(adev);
1962         if (r)
1963                 return r;
1964
1965         return 0;
1966 }
1967
1968 static int gfx_v8_0_sw_fini(void *handle)
1969 {
1970         int i;
1971         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1972
1973         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1974         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1975         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1976
1977         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1978                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1979         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1980                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1981
1982         gfx_v8_0_mec_fini(adev);
1983
1984         gfx_v8_0_rlc_fini(adev);
1985
1986         kfree(adev->gfx.rlc.register_list_format);
1987
1988         return 0;
1989 }
1990
1991 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1992 {
1993         uint32_t *modearray, *mod2array;
1994         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1995         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1996         u32 reg_offset;
1997
1998         modearray = adev->gfx.config.tile_mode_array;
1999         mod2array = adev->gfx.config.macrotile_mode_array;
2000
2001         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2002                 modearray[reg_offset] = 0;
2003
2004         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2005                 mod2array[reg_offset] = 0;
2006
2007         switch (adev->asic_type) {
2008         case CHIP_TOPAZ:
2009                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2010                                 PIPE_CONFIG(ADDR_SURF_P2) |
2011                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2012                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2013                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2014                                 PIPE_CONFIG(ADDR_SURF_P2) |
2015                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2017                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2018                                 PIPE_CONFIG(ADDR_SURF_P2) |
2019                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2020                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2021                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2022                                 PIPE_CONFIG(ADDR_SURF_P2) |
2023                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2024                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2025                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2026                                 PIPE_CONFIG(ADDR_SURF_P2) |
2027                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2028                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2029                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2030                                 PIPE_CONFIG(ADDR_SURF_P2) |
2031                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2032                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2033                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2034                                 PIPE_CONFIG(ADDR_SURF_P2) |
2035                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2036                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2037                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2038                                 PIPE_CONFIG(ADDR_SURF_P2));
2039                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2040                                 PIPE_CONFIG(ADDR_SURF_P2) |
2041                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2042                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2043                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2044                                  PIPE_CONFIG(ADDR_SURF_P2) |
2045                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2046                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2047                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2048                                  PIPE_CONFIG(ADDR_SURF_P2) |
2049                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2050                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2051                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2052                                  PIPE_CONFIG(ADDR_SURF_P2) |
2053                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2054                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2055                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056                                  PIPE_CONFIG(ADDR_SURF_P2) |
2057                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2058                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2059                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2060                                  PIPE_CONFIG(ADDR_SURF_P2) |
2061                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2062                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2063                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064                                  PIPE_CONFIG(ADDR_SURF_P2) |
2065                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2066                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2067                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2068                                  PIPE_CONFIG(ADDR_SURF_P2) |
2069                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2070                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2071                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2072                                  PIPE_CONFIG(ADDR_SURF_P2) |
2073                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2074                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2075                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2076                                  PIPE_CONFIG(ADDR_SURF_P2) |
2077                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2078                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2079                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2080                                  PIPE_CONFIG(ADDR_SURF_P2) |
2081                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2082                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2083                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2084                                  PIPE_CONFIG(ADDR_SURF_P2) |
2085                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2086                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2087                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2088                                  PIPE_CONFIG(ADDR_SURF_P2) |
2089                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2091                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2092                                  PIPE_CONFIG(ADDR_SURF_P2) |
2093                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2094                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2095                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2096                                  PIPE_CONFIG(ADDR_SURF_P2) |
2097                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2098                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2099                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2100                                  PIPE_CONFIG(ADDR_SURF_P2) |
2101                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2102                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2103                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2104                                  PIPE_CONFIG(ADDR_SURF_P2) |
2105                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2107                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2108                                  PIPE_CONFIG(ADDR_SURF_P2) |
2109                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2110                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2111
2112                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2113                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2114                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2115                                 NUM_BANKS(ADDR_SURF_8_BANK));
2116                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2117                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2118                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2119                                 NUM_BANKS(ADDR_SURF_8_BANK));
2120                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2121                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2122                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2123                                 NUM_BANKS(ADDR_SURF_8_BANK));
2124                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2125                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2126                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2127                                 NUM_BANKS(ADDR_SURF_8_BANK));
2128                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2129                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2130                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2131                                 NUM_BANKS(ADDR_SURF_8_BANK));
2132                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2133                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2134                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2135                                 NUM_BANKS(ADDR_SURF_8_BANK));
2136                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2137                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2138                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2139                                 NUM_BANKS(ADDR_SURF_8_BANK));
2140                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2141                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2142                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2143                                 NUM_BANKS(ADDR_SURF_16_BANK));
2144                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2145                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2146                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2147                                 NUM_BANKS(ADDR_SURF_16_BANK));
2148                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2149                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2150                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2151                                  NUM_BANKS(ADDR_SURF_16_BANK));
2152                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2153                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2154                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2155                                  NUM_BANKS(ADDR_SURF_16_BANK));
2156                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2157                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2158                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2159                                  NUM_BANKS(ADDR_SURF_16_BANK));
2160                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2161                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2162                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2163                                  NUM_BANKS(ADDR_SURF_16_BANK));
2164                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2165                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2166                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2167                                  NUM_BANKS(ADDR_SURF_8_BANK));
2168
2169                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2170                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2171                             reg_offset != 23)
2172                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2173
2174                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2175                         if (reg_offset != 7)
2176                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2177
2178                 break;
2179         case CHIP_FIJI:
2180                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2182                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2183                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2184                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2185                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2186                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2187                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2188                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2189                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2190                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2191                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2192                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2193                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2194                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2195                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2196                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2197                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2198                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2199                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2200                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2201                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2202                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2203                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2204                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2205                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2206                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2207                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2208                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2209                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2210                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2211                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2212                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2213                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2214                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2215                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2216                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2217                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2218                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2222                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2223                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2226                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2227                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2230                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2231                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2234                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2235                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2238                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2239                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2242                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2243                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2246                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2247                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2250                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2251                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2254                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2255                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2258                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2259                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2262                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2263                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2266                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2267                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2270                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2271                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2274                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2275                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2278                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2279                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2282                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2283                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2286                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2287                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2290                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2294                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2295                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2298                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2299                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2302
2303                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2305                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306                                 NUM_BANKS(ADDR_SURF_8_BANK));
2307                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2309                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2310                                 NUM_BANKS(ADDR_SURF_8_BANK));
2311                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2312                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2313                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2314                                 NUM_BANKS(ADDR_SURF_8_BANK));
2315                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2316                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2317                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2318                                 NUM_BANKS(ADDR_SURF_8_BANK));
2319                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2320                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2321                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2322                                 NUM_BANKS(ADDR_SURF_8_BANK));
2323                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2324                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2325                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2326                                 NUM_BANKS(ADDR_SURF_8_BANK));
2327                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2329                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2330                                 NUM_BANKS(ADDR_SURF_8_BANK));
2331                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2333                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2334                                 NUM_BANKS(ADDR_SURF_8_BANK));
2335                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2337                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338                                 NUM_BANKS(ADDR_SURF_8_BANK));
2339                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2341                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2342                                  NUM_BANKS(ADDR_SURF_8_BANK));
2343                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2346                                  NUM_BANKS(ADDR_SURF_8_BANK));
2347                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2349                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350                                  NUM_BANKS(ADDR_SURF_8_BANK));
2351                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354                                  NUM_BANKS(ADDR_SURF_8_BANK));
2355                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2358                                  NUM_BANKS(ADDR_SURF_4_BANK));
2359
2360                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2361                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2362
2363                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2364                         if (reg_offset != 7)
2365                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2366
2367                 break;
2368         case CHIP_TONGA:
2369                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2371                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2372                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2373                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2375                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2376                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2377                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2379                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2380                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2381                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2383                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2384                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2385                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2387                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2388                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2389                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2391                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2392                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2394                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2395                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2396                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2397                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2399                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2402                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2403                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2405                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2406                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2407                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2410                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2411                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2412                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2414                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2415                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2416                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2417                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2418                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2419                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2420                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2421                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2422                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2423                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2428                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2432                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2435                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2440                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2443                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2444                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2447                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2448                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2451                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2452                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2455                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2456                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2459                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2460                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2463                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2464                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2467                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2468                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2472                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2476                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2479                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2483                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2484                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2486                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2487                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2488                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2491
2492                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2494                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2495                                 NUM_BANKS(ADDR_SURF_16_BANK));
2496                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2498                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2499                                 NUM_BANKS(ADDR_SURF_16_BANK));
2500                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2502                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2503                                 NUM_BANKS(ADDR_SURF_16_BANK));
2504                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2506                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2507                                 NUM_BANKS(ADDR_SURF_16_BANK));
2508                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2510                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2511                                 NUM_BANKS(ADDR_SURF_16_BANK));
2512                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515                                 NUM_BANKS(ADDR_SURF_16_BANK));
2516                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519                                 NUM_BANKS(ADDR_SURF_16_BANK));
2520                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2522                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523                                 NUM_BANKS(ADDR_SURF_16_BANK));
2524                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2527                                 NUM_BANKS(ADDR_SURF_16_BANK));
2528                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2530                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2531                                  NUM_BANKS(ADDR_SURF_16_BANK));
2532                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2535                                  NUM_BANKS(ADDR_SURF_16_BANK));
2536                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539                                  NUM_BANKS(ADDR_SURF_8_BANK));
2540                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2542                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2543                                  NUM_BANKS(ADDR_SURF_4_BANK));
2544                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2546                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2547                                  NUM_BANKS(ADDR_SURF_4_BANK));
2548
2549                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2550                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2551
2552                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2553                         if (reg_offset != 7)
2554                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2555
2556                 break;
2557         case CHIP_POLARIS11:
2558                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2559                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2560                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2561                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2562                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2565                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2566                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2568                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2569                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2570                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2572                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2573                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2574                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2575                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2576                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2577                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2578                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2579                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2580                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2581                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2582                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2583                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2584                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2585                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2586                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2587                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2588                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2589                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2590                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2591                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2592                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2593                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2594                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2595                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2598                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2599                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2600                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2601                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2603                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2604                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2605                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2606                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2607                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2608                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2609                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2610                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2611                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2612                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2613                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2614                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2615                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2616                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2617                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2618                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2619                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2621                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2623                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2624                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2627                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2629                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2632                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2633                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2634                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2635                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2636                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2637                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2638                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2639                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2640                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2641                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2642                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2643                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2644                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2645                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2647                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2648                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2649                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2650                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2651                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2652                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2653                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2654                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2655                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2656                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2657                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2658                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2659                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2660                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2661                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2662                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2663                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2664                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2665                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2667                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2668                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2669                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2671                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2673                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2675                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2676                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2677                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2678                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2679                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2680
2681                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2683                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2684                                 NUM_BANKS(ADDR_SURF_16_BANK));
2685
2686                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2688                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2689                                 NUM_BANKS(ADDR_SURF_16_BANK));
2690
2691                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2693                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2694                                 NUM_BANKS(ADDR_SURF_16_BANK));
2695
2696                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699                                 NUM_BANKS(ADDR_SURF_16_BANK));
2700
2701                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2703                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2704                                 NUM_BANKS(ADDR_SURF_16_BANK));
2705
2706                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2707                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2708                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2709                                 NUM_BANKS(ADDR_SURF_16_BANK));
2710
2711                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2712                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2713                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2714                                 NUM_BANKS(ADDR_SURF_16_BANK));
2715
2716                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2717                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2718                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2719                                 NUM_BANKS(ADDR_SURF_16_BANK));
2720
2721                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2722                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2723                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2724                                 NUM_BANKS(ADDR_SURF_16_BANK));
2725
2726                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729                                 NUM_BANKS(ADDR_SURF_16_BANK));
2730
2731                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2733                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2734                                 NUM_BANKS(ADDR_SURF_16_BANK));
2735
2736                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2738                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2739                                 NUM_BANKS(ADDR_SURF_16_BANK));
2740
2741                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2742                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2743                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2744                                 NUM_BANKS(ADDR_SURF_8_BANK));
2745
2746                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2747                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2748                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2749                                 NUM_BANKS(ADDR_SURF_4_BANK));
2750
2751                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2752                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2753
2754                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2755                         if (reg_offset != 7)
2756                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2757
2758                 break;
2759         case CHIP_POLARIS10:
2760                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2762                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2763                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2764                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2766                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2767                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2768                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2770                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2771                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2772                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2773                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2774                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2775                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2776                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2777                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2778                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2780                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2781                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2782                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2785                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2786                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2788                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2789                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2792                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2793                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2794                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2795                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2796                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2797                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2799                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2800                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2801                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2802                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2803                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2805                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2806                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2807                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2809                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2810                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2811                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2813                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2814                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2817                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2818                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2819                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2821                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2825                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2826                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2827                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2829                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2830                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2831                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2832                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2833                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2834                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2835                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2837                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2838                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2839                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2840                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2841                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2842                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2843                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2844                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2845                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2846                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2847                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2849                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2850                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2851                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2852                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2853                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2854                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2855                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2856                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2857                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2858                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2859                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2860                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2861                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2862                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2863                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2864                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2865                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2866                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2867                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2868                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2869                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2870                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2871                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2872                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2873                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2875                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2876                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2877                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2878                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2879                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2881                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2882
2883                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2884                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2885                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2886                                 NUM_BANKS(ADDR_SURF_16_BANK));
2887
2888                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2890                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2891                                 NUM_BANKS(ADDR_SURF_16_BANK));
2892
2893                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2895                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2896                                 NUM_BANKS(ADDR_SURF_16_BANK));
2897
2898                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2899                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2900                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2901                                 NUM_BANKS(ADDR_SURF_16_BANK));
2902
2903                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2904                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2905                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2906                                 NUM_BANKS(ADDR_SURF_16_BANK));
2907
2908                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2909                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2910                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2911                                 NUM_BANKS(ADDR_SURF_16_BANK));
2912
2913                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2914                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2915                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2916                                 NUM_BANKS(ADDR_SURF_16_BANK));
2917
2918                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2919                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2920                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2921                                 NUM_BANKS(ADDR_SURF_16_BANK));
2922
2923                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2925                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2926                                 NUM_BANKS(ADDR_SURF_16_BANK));
2927
2928                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2929                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2930                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2931                                 NUM_BANKS(ADDR_SURF_16_BANK));
2932
2933                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2934                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2935                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2936                                 NUM_BANKS(ADDR_SURF_16_BANK));
2937
2938                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2941                                 NUM_BANKS(ADDR_SURF_8_BANK));
2942
2943                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2944                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2945                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2946                                 NUM_BANKS(ADDR_SURF_4_BANK));
2947
2948                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2949                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2950                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2951                                 NUM_BANKS(ADDR_SURF_4_BANK));
2952
2953                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2954                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2955
2956                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2957                         if (reg_offset != 7)
2958                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2959
2960                 break;
2961         case CHIP_STONEY:
2962                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2963                                 PIPE_CONFIG(ADDR_SURF_P2) |
2964                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2965                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2966                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2967                                 PIPE_CONFIG(ADDR_SURF_P2) |
2968                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2969                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2970                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2971                                 PIPE_CONFIG(ADDR_SURF_P2) |
2972                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2973                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2974                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2975                                 PIPE_CONFIG(ADDR_SURF_P2) |
2976                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2977                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2978                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2979                                 PIPE_CONFIG(ADDR_SURF_P2) |
2980                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2981                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2982                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2983                                 PIPE_CONFIG(ADDR_SURF_P2) |
2984                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2985                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2986                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2987                                 PIPE_CONFIG(ADDR_SURF_P2) |
2988                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2990                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2991                                 PIPE_CONFIG(ADDR_SURF_P2));
2992                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2993                                 PIPE_CONFIG(ADDR_SURF_P2) |
2994                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2995                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2996                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2997                                  PIPE_CONFIG(ADDR_SURF_P2) |
2998                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2999                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3000                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3001                                  PIPE_CONFIG(ADDR_SURF_P2) |
3002                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3003                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3004                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3005                                  PIPE_CONFIG(ADDR_SURF_P2) |
3006                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3007                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3008                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3009                                  PIPE_CONFIG(ADDR_SURF_P2) |
3010                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3011                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3012                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3013                                  PIPE_CONFIG(ADDR_SURF_P2) |
3014                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3015                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3016                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3017                                  PIPE_CONFIG(ADDR_SURF_P2) |
3018                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3019                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3020                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3021                                  PIPE_CONFIG(ADDR_SURF_P2) |
3022                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3023                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3024                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3025                                  PIPE_CONFIG(ADDR_SURF_P2) |
3026                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3027                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3028                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3029                                  PIPE_CONFIG(ADDR_SURF_P2) |
3030                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3031                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3032                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3033                                  PIPE_CONFIG(ADDR_SURF_P2) |
3034                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3035                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3036                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3037                                  PIPE_CONFIG(ADDR_SURF_P2) |
3038                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3039                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3040                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3041                                  PIPE_CONFIG(ADDR_SURF_P2) |
3042                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3043                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3044                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3045                                  PIPE_CONFIG(ADDR_SURF_P2) |
3046                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3047                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3048                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3049                                  PIPE_CONFIG(ADDR_SURF_P2) |
3050                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3051                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3052                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3053                                  PIPE_CONFIG(ADDR_SURF_P2) |
3054                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3056                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3057                                  PIPE_CONFIG(ADDR_SURF_P2) |
3058                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3059                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3060                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3061                                  PIPE_CONFIG(ADDR_SURF_P2) |
3062                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3063                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3064
3065                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3067                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3068                                 NUM_BANKS(ADDR_SURF_8_BANK));
3069                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3071                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3072                                 NUM_BANKS(ADDR_SURF_8_BANK));
3073                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3075                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076                                 NUM_BANKS(ADDR_SURF_8_BANK));
3077                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3078                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3079                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3080                                 NUM_BANKS(ADDR_SURF_8_BANK));
3081                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3084                                 NUM_BANKS(ADDR_SURF_8_BANK));
3085                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3086                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3087                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3088                                 NUM_BANKS(ADDR_SURF_8_BANK));
3089                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3091                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3092                                 NUM_BANKS(ADDR_SURF_8_BANK));
3093                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3094                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3095                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3096                                 NUM_BANKS(ADDR_SURF_16_BANK));
3097                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3098                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3099                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3100                                 NUM_BANKS(ADDR_SURF_16_BANK));
3101                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3102                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3103                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3104                                  NUM_BANKS(ADDR_SURF_16_BANK));
3105                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3106                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3107                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3108                                  NUM_BANKS(ADDR_SURF_16_BANK));
3109                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3110                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3111                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3112                                  NUM_BANKS(ADDR_SURF_16_BANK));
3113                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3114                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3115                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3116                                  NUM_BANKS(ADDR_SURF_16_BANK));
3117                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3120                                  NUM_BANKS(ADDR_SURF_8_BANK));
3121
3122                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3123                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3124                             reg_offset != 23)
3125                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3126
3127                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3128                         if (reg_offset != 7)
3129                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3130
3131                 break;
3132         default:
3133                 dev_warn(adev->dev,
3134                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3135                          adev->asic_type);
3136
3137         case CHIP_CARRIZO:
3138                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3139                                 PIPE_CONFIG(ADDR_SURF_P2) |
3140                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3141                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3142                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3143                                 PIPE_CONFIG(ADDR_SURF_P2) |
3144                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3145                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3146                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3147                                 PIPE_CONFIG(ADDR_SURF_P2) |
3148                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3149                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3150                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3151                                 PIPE_CONFIG(ADDR_SURF_P2) |
3152                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3153                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3154                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3155                                 PIPE_CONFIG(ADDR_SURF_P2) |
3156                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3157                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3158                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3159                                 PIPE_CONFIG(ADDR_SURF_P2) |
3160                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3161                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3162                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3163                                 PIPE_CONFIG(ADDR_SURF_P2) |
3164                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3165                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3166                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3167                                 PIPE_CONFIG(ADDR_SURF_P2));
3168                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3169                                 PIPE_CONFIG(ADDR_SURF_P2) |
3170                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3171                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3172                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3173                                  PIPE_CONFIG(ADDR_SURF_P2) |
3174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3176                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3177                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3180                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3181                                  PIPE_CONFIG(ADDR_SURF_P2) |
3182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3184                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3185                                  PIPE_CONFIG(ADDR_SURF_P2) |
3186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3188                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3189                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3192                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3193                                  PIPE_CONFIG(ADDR_SURF_P2) |
3194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3196                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3197                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3200                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3201                                  PIPE_CONFIG(ADDR_SURF_P2) |
3202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3204                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3205                                  PIPE_CONFIG(ADDR_SURF_P2) |
3206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3208                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3209                                  PIPE_CONFIG(ADDR_SURF_P2) |
3210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3212                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3213                                  PIPE_CONFIG(ADDR_SURF_P2) |
3214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3216                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3217                                  PIPE_CONFIG(ADDR_SURF_P2) |
3218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3220                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3221                                  PIPE_CONFIG(ADDR_SURF_P2) |
3222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3224                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3225                                  PIPE_CONFIG(ADDR_SURF_P2) |
3226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3228                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3229                                  PIPE_CONFIG(ADDR_SURF_P2) |
3230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3232                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3233                                  PIPE_CONFIG(ADDR_SURF_P2) |
3234                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3235                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3236                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3237                                  PIPE_CONFIG(ADDR_SURF_P2) |
3238                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3239                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3240
3241                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3242                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3243                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244                                 NUM_BANKS(ADDR_SURF_8_BANK));
3245                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3246                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248                                 NUM_BANKS(ADDR_SURF_8_BANK));
3249                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3251                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3252                                 NUM_BANKS(ADDR_SURF_8_BANK));
3253                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3256                                 NUM_BANKS(ADDR_SURF_8_BANK));
3257                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3259                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3260                                 NUM_BANKS(ADDR_SURF_8_BANK));
3261                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3262                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3263                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3264                                 NUM_BANKS(ADDR_SURF_8_BANK));
3265                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3266                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3267                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3268                                 NUM_BANKS(ADDR_SURF_8_BANK));
3269                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3270                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3271                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3272                                 NUM_BANKS(ADDR_SURF_16_BANK));
3273                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3274                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3275                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3276                                 NUM_BANKS(ADDR_SURF_16_BANK));
3277                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3278                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3279                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3280                                  NUM_BANKS(ADDR_SURF_16_BANK));
3281                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3282                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3283                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3284                                  NUM_BANKS(ADDR_SURF_16_BANK));
3285                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3286                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3287                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3288                                  NUM_BANKS(ADDR_SURF_16_BANK));
3289                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3290                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3291                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3292                                  NUM_BANKS(ADDR_SURF_16_BANK));
3293                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3294                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3295                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3296                                  NUM_BANKS(ADDR_SURF_8_BANK));
3297
3298                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3299                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3300                             reg_offset != 23)
3301                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3302
3303                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3304                         if (reg_offset != 7)
3305                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3306
3307                 break;
3308         }
3309 }
3310
3311 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3312 {
3313         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3314
3315         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3316                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3317                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3318         } else if (se_num == 0xffffffff) {
3319                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3320                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3321         } else if (sh_num == 0xffffffff) {
3322                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3323                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3324         } else {
3325                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3326                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3327         }
3328         WREG32(mmGRBM_GFX_INDEX, data);
3329 }
3330
3331 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3332 {
3333         return (u32)((1ULL << bit_width) - 1);
3334 }
3335
3336 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3337 {
3338         u32 data, mask;
3339
3340         data = RREG32(mmCC_RB_BACKEND_DISABLE);
3341         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3342
3343         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3344         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3345
3346         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3347                                        adev->gfx.config.max_sh_per_se);
3348
3349         return (~data) & mask;
3350 }
3351
3352 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3353 {
3354         int i, j;
3355         u32 data;
3356         u32 active_rbs = 0;
3357         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3358                                         adev->gfx.config.max_sh_per_se;
3359
3360         mutex_lock(&adev->grbm_idx_mutex);
3361         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3362                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3363                         gfx_v8_0_select_se_sh(adev, i, j);
3364                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3365                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3366                                                rb_bitmap_width_per_sh);
3367                 }
3368         }
3369         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3370         mutex_unlock(&adev->grbm_idx_mutex);
3371
3372         adev->gfx.config.backend_enable_mask = active_rbs;
3373         adev->gfx.config.num_rbs = hweight32(active_rbs);
3374 }
3375
3376 /**
3377  * gfx_v8_0_init_compute_vmid - gart enable
3378  *
3379  * @rdev: amdgpu_device pointer
3380  *
3381  * Initialize compute vmid sh_mem registers
3382  *
3383  */
3384 #define DEFAULT_SH_MEM_BASES    (0x6000)
3385 #define FIRST_COMPUTE_VMID      (8)
3386 #define LAST_COMPUTE_VMID       (16)
3387 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3388 {
3389         int i;
3390         uint32_t sh_mem_config;
3391         uint32_t sh_mem_bases;
3392
3393         /*
3394          * Configure apertures:
3395          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3396          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3397          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3398          */
3399         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3400
3401         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3402                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3403                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3404                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3405                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3406                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3407
3408         mutex_lock(&adev->srbm_mutex);
3409         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3410                 vi_srbm_select(adev, 0, 0, 0, i);
3411                 /* CP and shaders */
3412                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3413                 WREG32(mmSH_MEM_APE1_BASE, 1);
3414                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3415                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3416         }
3417         vi_srbm_select(adev, 0, 0, 0, 0);
3418         mutex_unlock(&adev->srbm_mutex);
3419 }
3420
3421 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3422 {
3423         u32 tmp;
3424         int i;
3425
3426         tmp = RREG32(mmGRBM_CNTL);
3427         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3428         WREG32(mmGRBM_CNTL, tmp);
3429
3430         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3431         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3432         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3433
3434         gfx_v8_0_tiling_mode_table_init(adev);
3435
3436         gfx_v8_0_setup_rb(adev);
3437         gfx_v8_0_get_cu_info(adev);
3438
3439         /* XXX SH_MEM regs */
3440         /* where to put LDS, scratch, GPUVM in FSA64 space */
3441         mutex_lock(&adev->srbm_mutex);
3442         for (i = 0; i < 16; i++) {
3443                 vi_srbm_select(adev, 0, 0, 0, i);
3444                 /* CP and shaders */
3445                 if (i == 0) {
3446                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3447                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3448                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3449                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3450                         WREG32(mmSH_MEM_CONFIG, tmp);
3451                 } else {
3452                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3453                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3454                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3455                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3456                         WREG32(mmSH_MEM_CONFIG, tmp);
3457                 }
3458
3459                 WREG32(mmSH_MEM_APE1_BASE, 1);
3460                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3461                 WREG32(mmSH_MEM_BASES, 0);
3462         }
3463         vi_srbm_select(adev, 0, 0, 0, 0);
3464         mutex_unlock(&adev->srbm_mutex);
3465
3466         gfx_v8_0_init_compute_vmid(adev);
3467
3468         mutex_lock(&adev->grbm_idx_mutex);
3469         /*
3470          * making sure that the following register writes will be broadcasted
3471          * to all the shaders
3472          */
3473         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3474
3475         WREG32(mmPA_SC_FIFO_SIZE,
3476                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3477                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3478                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3479                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3480                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3481                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3482                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3483                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3484         mutex_unlock(&adev->grbm_idx_mutex);
3485
3486 }
3487
3488 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3489 {
3490         u32 i, j, k;
3491         u32 mask;
3492
3493         mutex_lock(&adev->grbm_idx_mutex);
3494         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3495                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3496                         gfx_v8_0_select_se_sh(adev, i, j);
3497                         for (k = 0; k < adev->usec_timeout; k++) {
3498                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3499                                         break;
3500                                 udelay(1);
3501                         }
3502                 }
3503         }
3504         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3505         mutex_unlock(&adev->grbm_idx_mutex);
3506
3507         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3508                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3509                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3510                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3511         for (k = 0; k < adev->usec_timeout; k++) {
3512                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3513                         break;
3514                 udelay(1);
3515         }
3516 }
3517
3518 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3519                                                bool enable)
3520 {
3521         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3522
3523         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3524         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3525         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3526         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3527
3528         WREG32(mmCP_INT_CNTL_RING0, tmp);
3529 }
3530
3531 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3532 {
3533         /* csib */
3534         WREG32(mmRLC_CSIB_ADDR_HI,
3535                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3536         WREG32(mmRLC_CSIB_ADDR_LO,
3537                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3538         WREG32(mmRLC_CSIB_LENGTH,
3539                         adev->gfx.rlc.clear_state_size);
3540 }
3541
3542 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3543                                 int ind_offset,
3544                                 int list_size,
3545                                 int *unique_indices,
3546                                 int *indices_count,
3547                                 int max_indices,
3548                                 int *ind_start_offsets,
3549                                 int *offset_count,
3550                                 int max_offset)
3551 {
3552         int indices;
3553         bool new_entry = true;
3554
3555         for (; ind_offset < list_size; ind_offset++) {
3556
3557                 if (new_entry) {
3558                         new_entry = false;
3559                         ind_start_offsets[*offset_count] = ind_offset;
3560                         *offset_count = *offset_count + 1;
3561                         BUG_ON(*offset_count >= max_offset);
3562                 }
3563
3564                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3565                         new_entry = true;
3566                         continue;
3567                 }
3568
3569                 ind_offset += 2;
3570
3571                 /* look for the matching indice */
3572                 for (indices = 0;
3573                         indices < *indices_count;
3574                         indices++) {
3575                         if (unique_indices[indices] ==
3576                                 register_list_format[ind_offset])
3577                                 break;
3578                 }
3579
3580                 if (indices >= *indices_count) {
3581                         unique_indices[*indices_count] =
3582                                 register_list_format[ind_offset];
3583                         indices = *indices_count;
3584                         *indices_count = *indices_count + 1;
3585                         BUG_ON(*indices_count >= max_indices);
3586                 }
3587
3588                 register_list_format[ind_offset] = indices;
3589         }
3590 }
3591
3592 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3593 {
3594         int i, temp, data;
3595         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3596         int indices_count = 0;
3597         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3598         int offset_count = 0;
3599
3600         int list_size;
3601         unsigned int *register_list_format =
3602                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3603         if (register_list_format == NULL)
3604                 return -ENOMEM;
3605         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3606                         adev->gfx.rlc.reg_list_format_size_bytes);
3607
3608         gfx_v8_0_parse_ind_reg_list(register_list_format,
3609                                 RLC_FormatDirectRegListLength,
3610                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3611                                 unique_indices,
3612                                 &indices_count,
3613                                 sizeof(unique_indices) / sizeof(int),
3614                                 indirect_start_offsets,
3615                                 &offset_count,
3616                                 sizeof(indirect_start_offsets)/sizeof(int));
3617
3618         /* save and restore list */
3619         temp = RREG32(mmRLC_SRM_CNTL);
3620         temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3621         WREG32(mmRLC_SRM_CNTL, temp);
3622
3623         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3624         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3625                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3626
3627         /* indirect list */
3628         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3629         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3630                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3631
3632         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3633         list_size = list_size >> 1;
3634         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3635         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3636
3637         /* starting offsets starts */
3638         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3639                 adev->gfx.rlc.starting_offsets_start);
3640         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3641                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3642                                 indirect_start_offsets[i]);
3643
3644         /* unique indices */
3645         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3646         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3647         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3648                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3649                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3650         }
3651         kfree(register_list_format);
3652
3653         return 0;
3654 }
3655
3656 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3657 {
3658         uint32_t data;
3659
3660         data = RREG32(mmRLC_SRM_CNTL);
3661         data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3662         WREG32(mmRLC_SRM_CNTL, data);
3663 }
3664
3665 static void polaris11_init_power_gating(struct amdgpu_device *adev)
3666 {
3667         uint32_t data;
3668
3669         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3670                         AMD_PG_SUPPORT_GFX_SMG |
3671                         AMD_PG_SUPPORT_GFX_DMG)) {
3672                 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3673                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3674                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3675                 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3676
3677                 data = 0;
3678                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3679                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3680                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3681                 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3682                 WREG32(mmRLC_PG_DELAY, data);
3683
3684                 data = RREG32(mmRLC_PG_DELAY_2);
3685                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3686                 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3687                 WREG32(mmRLC_PG_DELAY_2, data);
3688
3689                 data = RREG32(mmRLC_AUTO_PG_CTRL);
3690                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3691                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3692                 WREG32(mmRLC_AUTO_PG_CTRL, data);
3693         }
3694 }
3695
3696 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3697 {
3698         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3699                               AMD_PG_SUPPORT_GFX_SMG |
3700                               AMD_PG_SUPPORT_GFX_DMG |
3701                               AMD_PG_SUPPORT_CP |
3702                               AMD_PG_SUPPORT_GDS |
3703                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3704                 gfx_v8_0_init_csb(adev);
3705                 gfx_v8_0_init_save_restore_list(adev);
3706                 gfx_v8_0_enable_save_restore_machine(adev);
3707
3708                 if (adev->asic_type == CHIP_POLARIS11)
3709                         polaris11_init_power_gating(adev);
3710         }
3711 }
3712
3713 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3714 {
3715         u32 tmp = RREG32(mmRLC_CNTL);
3716
3717         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3718         WREG32(mmRLC_CNTL, tmp);
3719
3720         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3721
3722         gfx_v8_0_wait_for_rlc_serdes(adev);
3723 }
3724
3725 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3726 {
3727         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3728
3729         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3730         WREG32(mmGRBM_SOFT_RESET, tmp);
3731         udelay(50);
3732         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3733         WREG32(mmGRBM_SOFT_RESET, tmp);
3734         udelay(50);
3735 }
3736
3737 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3738 {
3739         u32 tmp = RREG32(mmRLC_CNTL);
3740
3741         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3742         WREG32(mmRLC_CNTL, tmp);
3743
3744         /* carrizo do enable cp interrupt after cp inited */
3745         if (!(adev->flags & AMD_IS_APU))
3746                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3747
3748         udelay(50);
3749 }
3750
3751 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3752 {
3753         const struct rlc_firmware_header_v2_0 *hdr;
3754         const __le32 *fw_data;
3755         unsigned i, fw_size;
3756
3757         if (!adev->gfx.rlc_fw)
3758                 return -EINVAL;
3759
3760         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3761         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3762
3763         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3764                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3765         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3766
3767         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3768         for (i = 0; i < fw_size; i++)
3769                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3770         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3771
3772         return 0;
3773 }
3774
3775 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3776 {
3777         int r;
3778
3779         gfx_v8_0_rlc_stop(adev);
3780
3781         /* disable CG */
3782         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3783         if (adev->asic_type == CHIP_POLARIS11 ||
3784                 adev->asic_type == CHIP_POLARIS10)
3785                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3786
3787         /* disable PG */
3788         WREG32(mmRLC_PG_CNTL, 0);
3789
3790         gfx_v8_0_rlc_reset(adev);
3791
3792         gfx_v8_0_init_pg(adev);
3793
3794         if (!adev->pp_enabled) {
3795                 if (!adev->firmware.smu_load) {
3796                         /* legacy rlc firmware loading */
3797                         r = gfx_v8_0_rlc_load_microcode(adev);
3798                         if (r)
3799                                 return r;
3800                 } else {
3801                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3802                                                         AMDGPU_UCODE_ID_RLC_G);
3803                         if (r)
3804                                 return -EINVAL;
3805                 }
3806         }
3807
3808         gfx_v8_0_rlc_start(adev);
3809
3810         return 0;
3811 }
3812
3813 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3814 {
3815         int i;
3816         u32 tmp = RREG32(mmCP_ME_CNTL);
3817
3818         if (enable) {
3819                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3820                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3821                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3822         } else {
3823                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3824                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3825                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3826                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3827                         adev->gfx.gfx_ring[i].ready = false;
3828         }
3829         WREG32(mmCP_ME_CNTL, tmp);
3830         udelay(50);
3831 }
3832
3833 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3834 {
3835         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3836         const struct gfx_firmware_header_v1_0 *ce_hdr;
3837         const struct gfx_firmware_header_v1_0 *me_hdr;
3838         const __le32 *fw_data;
3839         unsigned i, fw_size;
3840
3841         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3842                 return -EINVAL;
3843
3844         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3845                 adev->gfx.pfp_fw->data;
3846         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3847                 adev->gfx.ce_fw->data;
3848         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3849                 adev->gfx.me_fw->data;
3850
3851         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3852         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3853         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3854
3855         gfx_v8_0_cp_gfx_enable(adev, false);
3856
3857         /* PFP */
3858         fw_data = (const __le32 *)
3859                 (adev->gfx.pfp_fw->data +
3860                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3861         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3862         WREG32(mmCP_PFP_UCODE_ADDR, 0);
3863         for (i = 0; i < fw_size; i++)
3864                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3865         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3866
3867         /* CE */
3868         fw_data = (const __le32 *)
3869                 (adev->gfx.ce_fw->data +
3870                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3871         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3872         WREG32(mmCP_CE_UCODE_ADDR, 0);
3873         for (i = 0; i < fw_size; i++)
3874                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3875         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3876
3877         /* ME */
3878         fw_data = (const __le32 *)
3879                 (adev->gfx.me_fw->data +
3880                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3881         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3882         WREG32(mmCP_ME_RAM_WADDR, 0);
3883         for (i = 0; i < fw_size; i++)
3884                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3885         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3886
3887         return 0;
3888 }
3889
3890 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3891 {
3892         u32 count = 0;
3893         const struct cs_section_def *sect = NULL;
3894         const struct cs_extent_def *ext = NULL;
3895
3896         /* begin clear state */
3897         count += 2;
3898         /* context control state */
3899         count += 3;
3900
3901         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3902                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3903                         if (sect->id == SECT_CONTEXT)
3904                                 count += 2 + ext->reg_count;
3905                         else
3906                                 return 0;
3907                 }
3908         }
3909         /* pa_sc_raster_config/pa_sc_raster_config1 */
3910         count += 4;
3911         /* end clear state */
3912         count += 2;
3913         /* clear state */
3914         count += 2;
3915
3916         return count;
3917 }
3918
3919 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3920 {
3921         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3922         const struct cs_section_def *sect = NULL;
3923         const struct cs_extent_def *ext = NULL;
3924         int r, i;
3925
3926         /* init the CP */
3927         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3928         WREG32(mmCP_ENDIAN_SWAP, 0);
3929         WREG32(mmCP_DEVICE_ID, 1);
3930
3931         gfx_v8_0_cp_gfx_enable(adev, true);
3932
3933         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3934         if (r) {
3935                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3936                 return r;
3937         }
3938
3939         /* clear state buffer */
3940         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3941         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3942
3943         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3944         amdgpu_ring_write(ring, 0x80000000);
3945         amdgpu_ring_write(ring, 0x80000000);
3946
3947         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3948                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3949                         if (sect->id == SECT_CONTEXT) {
3950                                 amdgpu_ring_write(ring,
3951                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3952                                                ext->reg_count));
3953                                 amdgpu_ring_write(ring,
3954                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3955                                 for (i = 0; i < ext->reg_count; i++)
3956                                         amdgpu_ring_write(ring, ext->extent[i]);
3957                         }
3958                 }
3959         }
3960
3961         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3962         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3963         switch (adev->asic_type) {
3964         case CHIP_TONGA:
3965         case CHIP_POLARIS10:
3966                 amdgpu_ring_write(ring, 0x16000012);
3967                 amdgpu_ring_write(ring, 0x0000002A);
3968                 break;
3969         case CHIP_POLARIS11:
3970                 amdgpu_ring_write(ring, 0x16000012);
3971                 amdgpu_ring_write(ring, 0x00000000);
3972                 break;
3973         case CHIP_FIJI:
3974                 amdgpu_ring_write(ring, 0x3a00161a);
3975                 amdgpu_ring_write(ring, 0x0000002e);
3976                 break;
3977         case CHIP_TOPAZ:
3978         case CHIP_CARRIZO:
3979                 amdgpu_ring_write(ring, 0x00000002);
3980                 amdgpu_ring_write(ring, 0x00000000);
3981                 break;
3982         case CHIP_STONEY:
3983                 amdgpu_ring_write(ring, 0x00000000);
3984                 amdgpu_ring_write(ring, 0x00000000);
3985                 break;
3986         default:
3987                 BUG();
3988         }
3989
3990         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3991         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3992
3993         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3994         amdgpu_ring_write(ring, 0);
3995
3996         /* init the CE partitions */
3997         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3998         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3999         amdgpu_ring_write(ring, 0x8000);
4000         amdgpu_ring_write(ring, 0x8000);
4001
4002         amdgpu_ring_commit(ring);
4003
4004         return 0;
4005 }
4006
4007 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4008 {
4009         struct amdgpu_ring *ring;
4010         u32 tmp;
4011         u32 rb_bufsz;
4012         u64 rb_addr, rptr_addr;
4013         int r;
4014
4015         /* Set the write pointer delay */
4016         WREG32(mmCP_RB_WPTR_DELAY, 0);
4017
4018         /* set the RB to use vmid 0 */
4019         WREG32(mmCP_RB_VMID, 0);
4020
4021         /* Set ring buffer size */
4022         ring = &adev->gfx.gfx_ring[0];
4023         rb_bufsz = order_base_2(ring->ring_size / 8);
4024         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4025         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4026         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4027         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4028 #ifdef __BIG_ENDIAN
4029         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4030 #endif
4031         WREG32(mmCP_RB0_CNTL, tmp);
4032
4033         /* Initialize the ring buffer's read and write pointers */
4034         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4035         ring->wptr = 0;
4036         WREG32(mmCP_RB0_WPTR, ring->wptr);
4037
4038         /* set the wb address wether it's enabled or not */
4039         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4040         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4041         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4042
4043         mdelay(1);
4044         WREG32(mmCP_RB0_CNTL, tmp);
4045
4046         rb_addr = ring->gpu_addr >> 8;
4047         WREG32(mmCP_RB0_BASE, rb_addr);
4048         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4049
4050         /* no gfx doorbells on iceland */
4051         if (adev->asic_type != CHIP_TOPAZ) {
4052                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4053                 if (ring->use_doorbell) {
4054                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4055                                             DOORBELL_OFFSET, ring->doorbell_index);
4056                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4057                                             DOORBELL_HIT, 0);
4058                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4059                                             DOORBELL_EN, 1);
4060                 } else {
4061                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4062                                             DOORBELL_EN, 0);
4063                 }
4064                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4065
4066                 if (adev->asic_type == CHIP_TONGA) {
4067                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4068                                             DOORBELL_RANGE_LOWER,
4069                                             AMDGPU_DOORBELL_GFX_RING0);
4070                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4071
4072                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4073                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4074                 }
4075
4076         }
4077
4078         /* start the ring */
4079         gfx_v8_0_cp_gfx_start(adev);
4080         ring->ready = true;
4081         r = amdgpu_ring_test_ring(ring);
4082         if (r) {
4083                 ring->ready = false;
4084                 return r;
4085         }
4086
4087         return 0;
4088 }
4089
4090 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4091 {
4092         int i;
4093
4094         if (enable) {
4095                 WREG32(mmCP_MEC_CNTL, 0);
4096         } else {
4097                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4098                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4099                         adev->gfx.compute_ring[i].ready = false;
4100         }
4101         udelay(50);
4102 }
4103
4104 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4105 {
4106         const struct gfx_firmware_header_v1_0 *mec_hdr;
4107         const __le32 *fw_data;
4108         unsigned i, fw_size;
4109
4110         if (!adev->gfx.mec_fw)
4111                 return -EINVAL;
4112
4113         gfx_v8_0_cp_compute_enable(adev, false);
4114
4115         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4116         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4117
4118         fw_data = (const __le32 *)
4119                 (adev->gfx.mec_fw->data +
4120                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4121         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4122
4123         /* MEC1 */
4124         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4125         for (i = 0; i < fw_size; i++)
4126                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4127         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4128
4129         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4130         if (adev->gfx.mec2_fw) {
4131                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4132
4133                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4134                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4135
4136                 fw_data = (const __le32 *)
4137                         (adev->gfx.mec2_fw->data +
4138                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4139                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4140
4141                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4142                 for (i = 0; i < fw_size; i++)
4143                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4144                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4145         }
4146
4147         return 0;
4148 }
4149
4150 struct vi_mqd {
4151         uint32_t header;  /* ordinal0 */
4152         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4153         uint32_t compute_dim_x;  /* ordinal2 */
4154         uint32_t compute_dim_y;  /* ordinal3 */
4155         uint32_t compute_dim_z;  /* ordinal4 */
4156         uint32_t compute_start_x;  /* ordinal5 */
4157         uint32_t compute_start_y;  /* ordinal6 */
4158         uint32_t compute_start_z;  /* ordinal7 */
4159         uint32_t compute_num_thread_x;  /* ordinal8 */
4160         uint32_t compute_num_thread_y;  /* ordinal9 */
4161         uint32_t compute_num_thread_z;  /* ordinal10 */
4162         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4163         uint32_t compute_perfcount_enable;  /* ordinal12 */
4164         uint32_t compute_pgm_lo;  /* ordinal13 */
4165         uint32_t compute_pgm_hi;  /* ordinal14 */
4166         uint32_t compute_tba_lo;  /* ordinal15 */
4167         uint32_t compute_tba_hi;  /* ordinal16 */
4168         uint32_t compute_tma_lo;  /* ordinal17 */
4169         uint32_t compute_tma_hi;  /* ordinal18 */
4170         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4171         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4172         uint32_t compute_vmid;  /* ordinal21 */
4173         uint32_t compute_resource_limits;  /* ordinal22 */
4174         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4175         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4176         uint32_t compute_tmpring_size;  /* ordinal25 */
4177         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4178         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4179         uint32_t compute_restart_x;  /* ordinal28 */
4180         uint32_t compute_restart_y;  /* ordinal29 */
4181         uint32_t compute_restart_z;  /* ordinal30 */
4182         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4183         uint32_t compute_misc_reserved;  /* ordinal32 */
4184         uint32_t compute_dispatch_id;  /* ordinal33 */
4185         uint32_t compute_threadgroup_id;  /* ordinal34 */
4186         uint32_t compute_relaunch;  /* ordinal35 */
4187         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4188         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4189         uint32_t compute_wave_restore_control;  /* ordinal38 */
4190         uint32_t reserved9;  /* ordinal39 */
4191         uint32_t reserved10;  /* ordinal40 */
4192         uint32_t reserved11;  /* ordinal41 */
4193         uint32_t reserved12;  /* ordinal42 */
4194         uint32_t reserved13;  /* ordinal43 */
4195         uint32_t reserved14;  /* ordinal44 */
4196         uint32_t reserved15;  /* ordinal45 */
4197         uint32_t reserved16;  /* ordinal46 */
4198         uint32_t reserved17;  /* ordinal47 */
4199         uint32_t reserved18;  /* ordinal48 */
4200         uint32_t reserved19;  /* ordinal49 */
4201         uint32_t reserved20;  /* ordinal50 */
4202         uint32_t reserved21;  /* ordinal51 */
4203         uint32_t reserved22;  /* ordinal52 */
4204         uint32_t reserved23;  /* ordinal53 */
4205         uint32_t reserved24;  /* ordinal54 */
4206         uint32_t reserved25;  /* ordinal55 */
4207         uint32_t reserved26;  /* ordinal56 */
4208         uint32_t reserved27;  /* ordinal57 */
4209         uint32_t reserved28;  /* ordinal58 */
4210         uint32_t reserved29;  /* ordinal59 */
4211         uint32_t reserved30;  /* ordinal60 */
4212         uint32_t reserved31;  /* ordinal61 */
4213         uint32_t reserved32;  /* ordinal62 */
4214         uint32_t reserved33;  /* ordinal63 */
4215         uint32_t reserved34;  /* ordinal64 */
4216         uint32_t compute_user_data_0;  /* ordinal65 */
4217         uint32_t compute_user_data_1;  /* ordinal66 */
4218         uint32_t compute_user_data_2;  /* ordinal67 */
4219         uint32_t compute_user_data_3;  /* ordinal68 */
4220         uint32_t compute_user_data_4;  /* ordinal69 */
4221         uint32_t compute_user_data_5;  /* ordinal70 */
4222         uint32_t compute_user_data_6;  /* ordinal71 */
4223         uint32_t compute_user_data_7;  /* ordinal72 */
4224         uint32_t compute_user_data_8;  /* ordinal73 */
4225         uint32_t compute_user_data_9;  /* ordinal74 */
4226         uint32_t compute_user_data_10;  /* ordinal75 */
4227         uint32_t compute_user_data_11;  /* ordinal76 */
4228         uint32_t compute_user_data_12;  /* ordinal77 */
4229         uint32_t compute_user_data_13;  /* ordinal78 */
4230         uint32_t compute_user_data_14;  /* ordinal79 */
4231         uint32_t compute_user_data_15;  /* ordinal80 */
4232         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4233         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4234         uint32_t reserved35;  /* ordinal83 */
4235         uint32_t reserved36;  /* ordinal84 */
4236         uint32_t reserved37;  /* ordinal85 */
4237         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4238         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4239         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4240         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4241         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4242         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4243         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4244         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4245         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4246         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4247         uint32_t reserved38;  /* ordinal96 */
4248         uint32_t reserved39;  /* ordinal97 */
4249         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4250         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4251         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4252         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4253         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4254         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4255         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4256         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4257         uint32_t reserved40;  /* ordinal106 */
4258         uint32_t reserved41;  /* ordinal107 */
4259         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4260         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4261         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4262         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4263         uint32_t reserved42;  /* ordinal112 */
4264         uint32_t reserved43;  /* ordinal113 */
4265         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4266         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4267         uint32_t cp_packet_id_lo;  /* ordinal116 */
4268         uint32_t cp_packet_id_hi;  /* ordinal117 */
4269         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4270         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4271         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4272         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4273         uint32_t gds_save_mask_lo;  /* ordinal122 */
4274         uint32_t gds_save_mask_hi;  /* ordinal123 */
4275         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4276         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4277         uint32_t reserved44;  /* ordinal126 */
4278         uint32_t reserved45;  /* ordinal127 */
4279         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4280         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4281         uint32_t cp_hqd_active;  /* ordinal130 */
4282         uint32_t cp_hqd_vmid;  /* ordinal131 */
4283         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4284         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4285         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4286         uint32_t cp_hqd_quantum;  /* ordinal135 */
4287         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4288         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4289         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4290         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4291         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4292         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4293         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4294         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4295         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4296         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4297         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4298         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4299         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4300         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4301         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4302         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4303         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4304         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4305         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4306         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4307         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4308         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4309         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4310         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4311         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4312         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4313         uint32_t cp_mqd_control;  /* ordinal162 */
4314         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4315         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4316         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4317         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4318         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4319         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4320         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4321         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4322         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4323         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4324         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4325         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4326         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4327         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4328         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4329         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4330         uint32_t cp_hqd_error;  /* ordinal179 */
4331         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4332         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4333         uint32_t reserved46;  /* ordinal182 */
4334         uint32_t reserved47;  /* ordinal183 */
4335         uint32_t reserved48;  /* ordinal184 */
4336         uint32_t reserved49;  /* ordinal185 */
4337         uint32_t reserved50;  /* ordinal186 */
4338         uint32_t reserved51;  /* ordinal187 */
4339         uint32_t reserved52;  /* ordinal188 */
4340         uint32_t reserved53;  /* ordinal189 */
4341         uint32_t reserved54;  /* ordinal190 */
4342         uint32_t reserved55;  /* ordinal191 */
4343         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4344         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4345         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4346         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4347         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4348         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4349         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4350         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4351         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4352         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4353         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4354         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4355         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4356         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4357         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4358         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4359         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4360         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4361         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4362         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4363         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4364         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4365         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4366         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4367         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4368         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4369         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4370         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4371         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4372         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4373         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4374         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4375         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4376         uint32_t reserved56;  /* ordinal225 */
4377         uint32_t reserved57;  /* ordinal226 */
4378         uint32_t reserved58;  /* ordinal227 */
4379         uint32_t set_resources_header;  /* ordinal228 */
4380         uint32_t set_resources_dw1;  /* ordinal229 */
4381         uint32_t set_resources_dw2;  /* ordinal230 */
4382         uint32_t set_resources_dw3;  /* ordinal231 */
4383         uint32_t set_resources_dw4;  /* ordinal232 */
4384         uint32_t set_resources_dw5;  /* ordinal233 */
4385         uint32_t set_resources_dw6;  /* ordinal234 */
4386         uint32_t set_resources_dw7;  /* ordinal235 */
4387         uint32_t reserved59;  /* ordinal236 */
4388         uint32_t reserved60;  /* ordinal237 */
4389         uint32_t reserved61;  /* ordinal238 */
4390         uint32_t reserved62;  /* ordinal239 */
4391         uint32_t reserved63;  /* ordinal240 */
4392         uint32_t reserved64;  /* ordinal241 */
4393         uint32_t reserved65;  /* ordinal242 */
4394         uint32_t reserved66;  /* ordinal243 */
4395         uint32_t reserved67;  /* ordinal244 */
4396         uint32_t reserved68;  /* ordinal245 */
4397         uint32_t reserved69;  /* ordinal246 */
4398         uint32_t reserved70;  /* ordinal247 */
4399         uint32_t reserved71;  /* ordinal248 */
4400         uint32_t reserved72;  /* ordinal249 */
4401         uint32_t reserved73;  /* ordinal250 */
4402         uint32_t reserved74;  /* ordinal251 */
4403         uint32_t reserved75;  /* ordinal252 */
4404         uint32_t reserved76;  /* ordinal253 */
4405         uint32_t reserved77;  /* ordinal254 */
4406         uint32_t reserved78;  /* ordinal255 */
4407
4408         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4409 };
4410
4411 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4412 {
4413         int i, r;
4414
4415         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4416                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4417
4418                 if (ring->mqd_obj) {
4419                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4420                         if (unlikely(r != 0))
4421                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4422
4423                         amdgpu_bo_unpin(ring->mqd_obj);
4424                         amdgpu_bo_unreserve(ring->mqd_obj);
4425
4426                         amdgpu_bo_unref(&ring->mqd_obj);
4427                         ring->mqd_obj = NULL;
4428                 }
4429         }
4430 }
4431
4432 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4433 {
4434         int r, i, j;
4435         u32 tmp;
4436         bool use_doorbell = true;
4437         u64 hqd_gpu_addr;
4438         u64 mqd_gpu_addr;
4439         u64 eop_gpu_addr;
4440         u64 wb_gpu_addr;
4441         u32 *buf;
4442         struct vi_mqd *mqd;
4443
4444         /* init the pipes */
4445         mutex_lock(&adev->srbm_mutex);
4446         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4447                 int me = (i < 4) ? 1 : 2;
4448                 int pipe = (i < 4) ? i : (i - 4);
4449
4450                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4451                 eop_gpu_addr >>= 8;
4452
4453                 vi_srbm_select(adev, me, pipe, 0, 0);
4454
4455                 /* write the EOP addr */
4456                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4457                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4458
4459                 /* set the VMID assigned */
4460                 WREG32(mmCP_HQD_VMID, 0);
4461
4462                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4463                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4464                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4465                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4466                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4467         }
4468         vi_srbm_select(adev, 0, 0, 0, 0);
4469         mutex_unlock(&adev->srbm_mutex);
4470
4471         /* init the queues.  Just two for now. */
4472         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4473                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4474
4475                 if (ring->mqd_obj == NULL) {
4476                         r = amdgpu_bo_create(adev,
4477                                              sizeof(struct vi_mqd),
4478                                              PAGE_SIZE, true,
4479                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4480                                              NULL, &ring->mqd_obj);
4481                         if (r) {
4482                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4483                                 return r;
4484                         }
4485                 }
4486
4487                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4488                 if (unlikely(r != 0)) {
4489                         gfx_v8_0_cp_compute_fini(adev);
4490                         return r;
4491                 }
4492                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4493                                   &mqd_gpu_addr);
4494                 if (r) {
4495                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4496                         gfx_v8_0_cp_compute_fini(adev);
4497                         return r;
4498                 }
4499                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4500                 if (r) {
4501                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4502                         gfx_v8_0_cp_compute_fini(adev);
4503                         return r;
4504                 }
4505
4506                 /* init the mqd struct */
4507                 memset(buf, 0, sizeof(struct vi_mqd));
4508
4509                 mqd = (struct vi_mqd *)buf;
4510                 mqd->header = 0xC0310800;
4511                 mqd->compute_pipelinestat_enable = 0x00000001;
4512                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4513                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4514                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4515                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4516                 mqd->compute_misc_reserved = 0x00000003;
4517
4518                 mutex_lock(&adev->srbm_mutex);
4519                 vi_srbm_select(adev, ring->me,
4520                                ring->pipe,
4521                                ring->queue, 0);
4522
4523                 /* disable wptr polling */
4524                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4525                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4526                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4527
4528                 mqd->cp_hqd_eop_base_addr_lo =
4529                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4530                 mqd->cp_hqd_eop_base_addr_hi =
4531                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4532
4533                 /* enable doorbell? */
4534                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4535                 if (use_doorbell) {
4536                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4537                 } else {
4538                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4539                 }
4540                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4541                 mqd->cp_hqd_pq_doorbell_control = tmp;
4542
4543                 /* disable the queue if it's active */
4544                 mqd->cp_hqd_dequeue_request = 0;
4545                 mqd->cp_hqd_pq_rptr = 0;
4546                 mqd->cp_hqd_pq_wptr= 0;
4547                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4548                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4549                         for (j = 0; j < adev->usec_timeout; j++) {
4550                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4551                                         break;
4552                                 udelay(1);
4553                         }
4554                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4555                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4556                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4557                 }
4558
4559                 /* set the pointer to the MQD */
4560                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4561                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4562                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4563                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4564
4565                 /* set MQD vmid to 0 */
4566                 tmp = RREG32(mmCP_MQD_CONTROL);
4567                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4568                 WREG32(mmCP_MQD_CONTROL, tmp);
4569                 mqd->cp_mqd_control = tmp;
4570
4571                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4572                 hqd_gpu_addr = ring->gpu_addr >> 8;
4573                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4574                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4575                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4576                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4577
4578                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4579                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4580                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4581                                     (order_base_2(ring->ring_size / 4) - 1));
4582                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4583                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4584 #ifdef __BIG_ENDIAN
4585                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4586 #endif
4587                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4588                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4589                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4590                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4591                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4592                 mqd->cp_hqd_pq_control = tmp;
4593
4594                 /* set the wb address wether it's enabled or not */
4595                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4596                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4597                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4598                         upper_32_bits(wb_gpu_addr) & 0xffff;
4599                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4600                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4601                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4602                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4603
4604                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4605                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4606                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4607                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4608                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4609                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4610                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4611
4612                 /* enable the doorbell if requested */
4613                 if (use_doorbell) {
4614                         if ((adev->asic_type == CHIP_CARRIZO) ||
4615                             (adev->asic_type == CHIP_FIJI) ||
4616                             (adev->asic_type == CHIP_STONEY) ||
4617                             (adev->asic_type == CHIP_POLARIS11) ||
4618                             (adev->asic_type == CHIP_POLARIS10)) {
4619                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4620                                        AMDGPU_DOORBELL_KIQ << 2);
4621                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4622                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4623                         }
4624                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4625                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4626                                             DOORBELL_OFFSET, ring->doorbell_index);
4627                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4628                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4629                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4630                         mqd->cp_hqd_pq_doorbell_control = tmp;
4631
4632                 } else {
4633                         mqd->cp_hqd_pq_doorbell_control = 0;
4634                 }
4635                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4636                        mqd->cp_hqd_pq_doorbell_control);
4637
4638                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4639                 ring->wptr = 0;
4640                 mqd->cp_hqd_pq_wptr = ring->wptr;
4641                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4642                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4643
4644                 /* set the vmid for the queue */
4645                 mqd->cp_hqd_vmid = 0;
4646                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4647
4648                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4649                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4650                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4651                 mqd->cp_hqd_persistent_state = tmp;
4652                 if (adev->asic_type == CHIP_STONEY ||
4653                         adev->asic_type == CHIP_POLARIS11 ||
4654                         adev->asic_type == CHIP_POLARIS10) {
4655                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4656                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4657                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4658                 }
4659
4660                 /* activate the queue */
4661                 mqd->cp_hqd_active = 1;
4662                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4663
4664                 vi_srbm_select(adev, 0, 0, 0, 0);
4665                 mutex_unlock(&adev->srbm_mutex);
4666
4667                 amdgpu_bo_kunmap(ring->mqd_obj);
4668                 amdgpu_bo_unreserve(ring->mqd_obj);
4669         }
4670
4671         if (use_doorbell) {
4672                 tmp = RREG32(mmCP_PQ_STATUS);
4673                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4674                 WREG32(mmCP_PQ_STATUS, tmp);
4675         }
4676
4677         gfx_v8_0_cp_compute_enable(adev, true);
4678
4679         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4680                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4681
4682                 ring->ready = true;
4683                 r = amdgpu_ring_test_ring(ring);
4684                 if (r)
4685                         ring->ready = false;
4686         }
4687
4688         return 0;
4689 }
4690
4691 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4692 {
4693         int r;
4694
4695         if (!(adev->flags & AMD_IS_APU))
4696                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4697
4698         if (!adev->pp_enabled) {
4699                 if (!adev->firmware.smu_load) {
4700                         /* legacy firmware loading */
4701                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4702                         if (r)
4703                                 return r;
4704
4705                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4706                         if (r)
4707                                 return r;
4708                 } else {
4709                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4710                                                         AMDGPU_UCODE_ID_CP_CE);
4711                         if (r)
4712                                 return -EINVAL;
4713
4714                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4715                                                         AMDGPU_UCODE_ID_CP_PFP);
4716                         if (r)
4717                                 return -EINVAL;
4718
4719                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4720                                                         AMDGPU_UCODE_ID_CP_ME);
4721                         if (r)
4722                                 return -EINVAL;
4723
4724                         if (adev->asic_type == CHIP_TOPAZ) {
4725                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4726                                 if (r)
4727                                         return r;
4728                         } else {
4729                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4730                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4731                                 if (r)
4732                                         return -EINVAL;
4733                         }
4734                 }
4735         }
4736
4737         r = gfx_v8_0_cp_gfx_resume(adev);
4738         if (r)
4739                 return r;
4740
4741         r = gfx_v8_0_cp_compute_resume(adev);
4742         if (r)
4743                 return r;
4744
4745         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4746
4747         return 0;
4748 }
4749
4750 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4751 {
4752         gfx_v8_0_cp_gfx_enable(adev, enable);
4753         gfx_v8_0_cp_compute_enable(adev, enable);
4754 }
4755
4756 static int gfx_v8_0_hw_init(void *handle)
4757 {
4758         int r;
4759         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4760
4761         gfx_v8_0_init_golden_registers(adev);
4762
4763         gfx_v8_0_gpu_init(adev);
4764
4765         r = gfx_v8_0_rlc_resume(adev);
4766         if (r)
4767                 return r;
4768
4769         r = gfx_v8_0_cp_resume(adev);
4770         if (r)
4771                 return r;
4772
4773         return r;
4774 }
4775
4776 static int gfx_v8_0_hw_fini(void *handle)
4777 {
4778         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4779
4780         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4781         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4782         gfx_v8_0_cp_enable(adev, false);
4783         gfx_v8_0_rlc_stop(adev);
4784         gfx_v8_0_cp_compute_fini(adev);
4785
4786         amdgpu_set_powergating_state(adev,
4787                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4788
4789         return 0;
4790 }
4791
4792 static int gfx_v8_0_suspend(void *handle)
4793 {
4794         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4795
4796         return gfx_v8_0_hw_fini(adev);
4797 }
4798
4799 static int gfx_v8_0_resume(void *handle)
4800 {
4801         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4802
4803         return gfx_v8_0_hw_init(adev);
4804 }
4805
4806 static bool gfx_v8_0_is_idle(void *handle)
4807 {
4808         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4809
4810         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4811                 return false;
4812         else
4813                 return true;
4814 }
4815
4816 static int gfx_v8_0_wait_for_idle(void *handle)
4817 {
4818         unsigned i;
4819         u32 tmp;
4820         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4821
4822         for (i = 0; i < adev->usec_timeout; i++) {
4823                 /* read MC_STATUS */
4824                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4825
4826                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4827                         return 0;
4828                 udelay(1);
4829         }
4830         return -ETIMEDOUT;
4831 }
4832
4833 static int gfx_v8_0_soft_reset(void *handle)
4834 {
4835         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4836         u32 tmp;
4837         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4838
4839         /* GRBM_STATUS */
4840         tmp = RREG32(mmGRBM_STATUS);
4841         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4842                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4843                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4844                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4845                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4846                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4847                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4848                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4849                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4850                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4851         }
4852
4853         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4854                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4855                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4856                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4857                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4858         }
4859
4860         /* GRBM_STATUS2 */
4861         tmp = RREG32(mmGRBM_STATUS2);
4862         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4863                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4864                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4865
4866         /* SRBM_STATUS */
4867         tmp = RREG32(mmSRBM_STATUS);
4868         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4869                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4870                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4871
4872         if (grbm_soft_reset || srbm_soft_reset) {
4873                 /* stop the rlc */
4874                 gfx_v8_0_rlc_stop(adev);
4875
4876                 /* Disable GFX parsing/prefetching */
4877                 gfx_v8_0_cp_gfx_enable(adev, false);
4878
4879                 /* Disable MEC parsing/prefetching */
4880                 gfx_v8_0_cp_compute_enable(adev, false);
4881
4882                 if (grbm_soft_reset || srbm_soft_reset) {
4883                         tmp = RREG32(mmGMCON_DEBUG);
4884                         tmp = REG_SET_FIELD(tmp,
4885                                             GMCON_DEBUG, GFX_STALL, 1);
4886                         tmp = REG_SET_FIELD(tmp,
4887                                             GMCON_DEBUG, GFX_CLEAR, 1);
4888                         WREG32(mmGMCON_DEBUG, tmp);
4889
4890                         udelay(50);
4891                 }
4892
4893                 if (grbm_soft_reset) {
4894                         tmp = RREG32(mmGRBM_SOFT_RESET);
4895                         tmp |= grbm_soft_reset;
4896                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4897                         WREG32(mmGRBM_SOFT_RESET, tmp);
4898                         tmp = RREG32(mmGRBM_SOFT_RESET);
4899
4900                         udelay(50);
4901
4902                         tmp &= ~grbm_soft_reset;
4903                         WREG32(mmGRBM_SOFT_RESET, tmp);
4904                         tmp = RREG32(mmGRBM_SOFT_RESET);
4905                 }
4906
4907                 if (srbm_soft_reset) {
4908                         tmp = RREG32(mmSRBM_SOFT_RESET);
4909                         tmp |= srbm_soft_reset;
4910                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4911                         WREG32(mmSRBM_SOFT_RESET, tmp);
4912                         tmp = RREG32(mmSRBM_SOFT_RESET);
4913
4914                         udelay(50);
4915
4916                         tmp &= ~srbm_soft_reset;
4917                         WREG32(mmSRBM_SOFT_RESET, tmp);
4918                         tmp = RREG32(mmSRBM_SOFT_RESET);
4919                 }
4920
4921                 if (grbm_soft_reset || srbm_soft_reset) {
4922                         tmp = RREG32(mmGMCON_DEBUG);
4923                         tmp = REG_SET_FIELD(tmp,
4924                                             GMCON_DEBUG, GFX_STALL, 0);
4925                         tmp = REG_SET_FIELD(tmp,
4926                                             GMCON_DEBUG, GFX_CLEAR, 0);
4927                         WREG32(mmGMCON_DEBUG, tmp);
4928                 }
4929
4930                 /* Wait a little for things to settle down */
4931                 udelay(50);
4932         }
4933         return 0;
4934 }
4935
4936 /**
4937  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4938  *
4939  * @adev: amdgpu_device pointer
4940  *
4941  * Fetches a GPU clock counter snapshot.
4942  * Returns the 64 bit clock counter snapshot.
4943  */
4944 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4945 {
4946         uint64_t clock;
4947
4948         mutex_lock(&adev->gfx.gpu_clock_mutex);
4949         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4950         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4951                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4952         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4953         return clock;
4954 }
4955
4956 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4957                                           uint32_t vmid,
4958                                           uint32_t gds_base, uint32_t gds_size,
4959                                           uint32_t gws_base, uint32_t gws_size,
4960                                           uint32_t oa_base, uint32_t oa_size)
4961 {
4962         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4963         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4964
4965         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4966         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4967
4968         oa_base = oa_base >> AMDGPU_OA_SHIFT;
4969         oa_size = oa_size >> AMDGPU_OA_SHIFT;
4970
4971         /* GDS Base */
4972         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4973         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4974                                 WRITE_DATA_DST_SEL(0)));
4975         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4976         amdgpu_ring_write(ring, 0);
4977         amdgpu_ring_write(ring, gds_base);
4978
4979         /* GDS Size */
4980         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4981         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4982                                 WRITE_DATA_DST_SEL(0)));
4983         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4984         amdgpu_ring_write(ring, 0);
4985         amdgpu_ring_write(ring, gds_size);
4986
4987         /* GWS */
4988         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4989         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4990                                 WRITE_DATA_DST_SEL(0)));
4991         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4992         amdgpu_ring_write(ring, 0);
4993         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4994
4995         /* OA */
4996         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4997         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4998                                 WRITE_DATA_DST_SEL(0)));
4999         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5000         amdgpu_ring_write(ring, 0);
5001         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5002 }
5003
5004 static int gfx_v8_0_early_init(void *handle)
5005 {
5006         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5007
5008         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5009         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5010         gfx_v8_0_set_ring_funcs(adev);
5011         gfx_v8_0_set_irq_funcs(adev);
5012         gfx_v8_0_set_gds_init(adev);
5013         gfx_v8_0_set_rlc_funcs(adev);
5014
5015         return 0;
5016 }
5017
5018 static int gfx_v8_0_late_init(void *handle)
5019 {
5020         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5021         int r;
5022
5023         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5024         if (r)
5025                 return r;
5026
5027         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5028         if (r)
5029                 return r;
5030
5031         /* requires IBs so do in late init after IB pool is initialized */
5032         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5033         if (r)
5034                 return r;
5035
5036         amdgpu_set_powergating_state(adev,
5037                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5038
5039         return 0;
5040 }
5041
5042 static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5043                 bool enable)
5044 {
5045         uint32_t data, temp;
5046
5047         /* Send msg to SMU via Powerplay */
5048         amdgpu_set_powergating_state(adev,
5049                         AMD_IP_BLOCK_TYPE_SMC,
5050                         enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5051
5052         if (enable) {
5053                 /* Enable static MGPG */
5054                 temp = data = RREG32(mmRLC_PG_CNTL);
5055                 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5056
5057                 if (temp != data)
5058                         WREG32(mmRLC_PG_CNTL, data);
5059         } else {
5060                 temp = data = RREG32(mmRLC_PG_CNTL);
5061                 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5062
5063                 if (temp != data)
5064                         WREG32(mmRLC_PG_CNTL, data);
5065         }
5066 }
5067
5068 static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5069                 bool enable)
5070 {
5071         uint32_t data, temp;
5072
5073         if (enable) {
5074                 /* Enable dynamic MGPG */
5075                 temp = data = RREG32(mmRLC_PG_CNTL);
5076                 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5077
5078                 if (temp != data)
5079                         WREG32(mmRLC_PG_CNTL, data);
5080         } else {
5081                 temp = data = RREG32(mmRLC_PG_CNTL);
5082                 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5083
5084                 if (temp != data)
5085                         WREG32(mmRLC_PG_CNTL, data);
5086         }
5087 }
5088
5089 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5090                 bool enable)
5091 {
5092         uint32_t data, temp;
5093
5094         if (enable) {
5095                 /* Enable quick PG */
5096                 temp = data = RREG32(mmRLC_PG_CNTL);
5097                 data |= 0x100000;
5098
5099                 if (temp != data)
5100                         WREG32(mmRLC_PG_CNTL, data);
5101         } else {
5102                 temp = data = RREG32(mmRLC_PG_CNTL);
5103                 data &= ~0x100000;
5104
5105                 if (temp != data)
5106                         WREG32(mmRLC_PG_CNTL, data);
5107         }
5108 }
5109
5110 static int gfx_v8_0_set_powergating_state(void *handle,
5111                                           enum amd_powergating_state state)
5112 {
5113         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5114
5115         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5116                 return 0;
5117
5118         switch (adev->asic_type) {
5119         case CHIP_POLARIS11:
5120                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
5121                         polaris11_enable_gfx_static_mg_power_gating(adev,
5122                                         state == AMD_PG_STATE_GATE ? true : false);
5123                 else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
5124                         polaris11_enable_gfx_dynamic_mg_power_gating(adev,
5125                                         state == AMD_PG_STATE_GATE ? true : false);
5126                 else
5127                         polaris11_enable_gfx_quick_mg_power_gating(adev,
5128                                         state == AMD_PG_STATE_GATE ? true : false);
5129                 break;
5130         default:
5131                 break;
5132         }
5133
5134         return 0;
5135 }
5136
5137 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5138                                      uint32_t reg_addr, uint32_t cmd)
5139 {
5140         uint32_t data;
5141
5142         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5143
5144         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5145         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5146
5147         data = RREG32(mmRLC_SERDES_WR_CTRL);
5148         if (adev->asic_type == CHIP_STONEY)
5149                         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5150                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5151                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5152                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5153                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5154                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5155                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5156                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5157                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5158         else
5159                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5160                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5161                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5162                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5163                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5164                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5165                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5166                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5167                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5168                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5169                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5170         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5171                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5172                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5173                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5174
5175         WREG32(mmRLC_SERDES_WR_CTRL, data);
5176 }
5177
5178 #define MSG_ENTER_RLC_SAFE_MODE     1
5179 #define MSG_EXIT_RLC_SAFE_MODE      0
5180
5181 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5182 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5183 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5184
5185 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5186 {
5187         u32 data = 0;
5188         unsigned i;
5189
5190         data = RREG32(mmRLC_CNTL);
5191         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5192                 return;
5193
5194         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5195             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5196                                AMD_PG_SUPPORT_GFX_DMG))) {
5197                 data |= RLC_GPR_REG2__REQ_MASK;
5198                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5199                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5200                 WREG32(mmRLC_GPR_REG2, data);
5201
5202                 for (i = 0; i < adev->usec_timeout; i++) {
5203                         if ((RREG32(mmRLC_GPM_STAT) &
5204                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5205                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5206                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5207                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5208                                 break;
5209                         udelay(1);
5210                 }
5211
5212                 for (i = 0; i < adev->usec_timeout; i++) {
5213                         if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5214                                 break;
5215                         udelay(1);
5216                 }
5217                 adev->gfx.rlc.in_safe_mode = true;
5218         }
5219 }
5220
5221 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5222 {
5223         u32 data;
5224         unsigned i;
5225
5226         data = RREG32(mmRLC_CNTL);
5227         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5228                 return;
5229
5230         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5231             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5232                                AMD_PG_SUPPORT_GFX_DMG))) {
5233                 data |= RLC_GPR_REG2__REQ_MASK;
5234                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5235                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5236                 WREG32(mmRLC_GPR_REG2, data);
5237                 adev->gfx.rlc.in_safe_mode = false;
5238         }
5239
5240         for (i = 0; i < adev->usec_timeout; i++) {
5241                 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5242                         break;
5243                 udelay(1);
5244         }
5245 }
5246
5247 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5248 {
5249         u32 data;
5250         unsigned i;
5251
5252         data = RREG32(mmRLC_CNTL);
5253         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5254                 return;
5255
5256         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5257                 data |= RLC_SAFE_MODE__CMD_MASK;
5258                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5259                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5260                 WREG32(mmRLC_SAFE_MODE, data);
5261
5262                 for (i = 0; i < adev->usec_timeout; i++) {
5263                         if ((RREG32(mmRLC_GPM_STAT) &
5264                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5265                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5266                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5267                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5268                                 break;
5269                         udelay(1);
5270                 }
5271
5272                 for (i = 0; i < adev->usec_timeout; i++) {
5273                         if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5274                                 break;
5275                         udelay(1);
5276                 }
5277                 adev->gfx.rlc.in_safe_mode = true;
5278         }
5279 }
5280
5281 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5282 {
5283         u32 data = 0;
5284         unsigned i;
5285
5286         data = RREG32(mmRLC_CNTL);
5287         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5288                 return;
5289
5290         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5291                 if (adev->gfx.rlc.in_safe_mode) {
5292                         data |= RLC_SAFE_MODE__CMD_MASK;
5293                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5294                         WREG32(mmRLC_SAFE_MODE, data);
5295                         adev->gfx.rlc.in_safe_mode = false;
5296                 }
5297         }
5298
5299         for (i = 0; i < adev->usec_timeout; i++) {
5300                 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5301                         break;
5302                 udelay(1);
5303         }
5304 }
5305
5306 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5307 {
5308         adev->gfx.rlc.in_safe_mode = true;
5309 }
5310
5311 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5312 {
5313         adev->gfx.rlc.in_safe_mode = false;
5314 }
5315
5316 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5317         .enter_safe_mode = cz_enter_rlc_safe_mode,
5318         .exit_safe_mode = cz_exit_rlc_safe_mode
5319 };
5320
5321 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5322         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5323         .exit_safe_mode = iceland_exit_rlc_safe_mode
5324 };
5325
5326 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5327         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5328         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5329 };
5330
5331 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5332                                                       bool enable)
5333 {
5334         uint32_t temp, data;
5335
5336         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5337
5338         /* It is disabled by HW by default */
5339         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5340                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5341                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5342                                 /* 1 - RLC memory Light sleep */
5343                                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5344                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5345                                 if (temp != data)
5346                                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5347                         }
5348
5349                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5350                                 /* 2 - CP memory Light sleep */
5351                                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5352                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5353                                 if (temp != data)
5354                                         WREG32(mmCP_MEM_SLP_CNTL, data);
5355                         }
5356                 }
5357
5358                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5359                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5360                 if (adev->flags & AMD_IS_APU)
5361                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5362                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5363                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5364                 else
5365                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5366                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5367                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5368                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5369
5370                 if (temp != data)
5371                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5372
5373                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5374                 gfx_v8_0_wait_for_rlc_serdes(adev);
5375
5376                 /* 5 - clear mgcg override */
5377                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5378
5379                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5380                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5381                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5382                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5383                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5384                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5385                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5386                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5387                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5388                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5389                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5390                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5391                         if (temp != data)
5392                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5393                 }
5394                 udelay(50);
5395
5396                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5397                 gfx_v8_0_wait_for_rlc_serdes(adev);
5398         } else {
5399                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5400                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5401                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5402                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5403                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5404                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5405                 if (temp != data)
5406                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5407
5408                 /* 2 - disable MGLS in RLC */
5409                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5410                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5411                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5412                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5413                 }
5414
5415                 /* 3 - disable MGLS in CP */
5416                 data = RREG32(mmCP_MEM_SLP_CNTL);
5417                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5418                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5419                         WREG32(mmCP_MEM_SLP_CNTL, data);
5420                 }
5421
5422                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5423                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5424                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5425                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5426                 if (temp != data)
5427                         WREG32(mmCGTS_SM_CTRL_REG, data);
5428
5429                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5430                 gfx_v8_0_wait_for_rlc_serdes(adev);
5431
5432                 /* 6 - set mgcg override */
5433                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5434
5435                 udelay(50);
5436
5437                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5438                 gfx_v8_0_wait_for_rlc_serdes(adev);
5439         }
5440
5441         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5442 }
5443
5444 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5445                                                       bool enable)
5446 {
5447         uint32_t temp, temp1, data, data1;
5448
5449         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5450
5451         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5452
5453         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5454                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5455                  * Cmp_busy/GFX_Idle interrupts
5456                  */
5457                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5458
5459                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5460                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5461                 if (temp1 != data1)
5462                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5463
5464                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5465                 gfx_v8_0_wait_for_rlc_serdes(adev);
5466
5467                 /* 3 - clear cgcg override */
5468                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5469
5470                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5471                 gfx_v8_0_wait_for_rlc_serdes(adev);
5472
5473                 /* 4 - write cmd to set CGLS */
5474                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5475
5476                 /* 5 - enable cgcg */
5477                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5478
5479                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5480                         /* enable cgls*/
5481                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5482
5483                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5484                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5485
5486                         if (temp1 != data1)
5487                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5488                 } else {
5489                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5490                 }
5491
5492                 if (temp != data)
5493                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5494         } else {
5495                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5496                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5497
5498                 /* TEST CGCG */
5499                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5500                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5501                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5502                 if (temp1 != data1)
5503                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5504
5505                 /* read gfx register to wake up cgcg */
5506                 RREG32(mmCB_CGTT_SCLK_CTRL);
5507                 RREG32(mmCB_CGTT_SCLK_CTRL);
5508                 RREG32(mmCB_CGTT_SCLK_CTRL);
5509                 RREG32(mmCB_CGTT_SCLK_CTRL);
5510
5511                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5512                 gfx_v8_0_wait_for_rlc_serdes(adev);
5513
5514                 /* write cmd to Set CGCG Overrride */
5515                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5516
5517                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5518                 gfx_v8_0_wait_for_rlc_serdes(adev);
5519
5520                 /* write cmd to Clear CGLS */
5521                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5522
5523                 /* disable cgcg, cgls should be disabled too. */
5524                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5525                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5526                 if (temp != data)
5527                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5528         }
5529
5530         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5531 }
5532 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5533                                             bool enable)
5534 {
5535         if (enable) {
5536                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5537                  * ===  MGCG + MGLS + TS(CG/LS) ===
5538                  */
5539                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5540                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5541         } else {
5542                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5543                  * ===  CGCG + CGLS ===
5544                  */
5545                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5546                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5547         }
5548         return 0;
5549 }
5550
5551 static int gfx_v8_0_set_clockgating_state(void *handle,
5552                                           enum amd_clockgating_state state)
5553 {
5554         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5555
5556         switch (adev->asic_type) {
5557         case CHIP_FIJI:
5558         case CHIP_CARRIZO:
5559         case CHIP_STONEY:
5560                 gfx_v8_0_update_gfx_clock_gating(adev,
5561                                                  state == AMD_CG_STATE_GATE ? true : false);
5562                 break;
5563         default:
5564                 break;
5565         }
5566         return 0;
5567 }
5568
5569 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5570 {
5571         u32 rptr;
5572
5573         rptr = ring->adev->wb.wb[ring->rptr_offs];
5574
5575         return rptr;
5576 }
5577
5578 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5579 {
5580         struct amdgpu_device *adev = ring->adev;
5581         u32 wptr;
5582
5583         if (ring->use_doorbell)
5584                 /* XXX check if swapping is necessary on BE */
5585                 wptr = ring->adev->wb.wb[ring->wptr_offs];
5586         else
5587                 wptr = RREG32(mmCP_RB0_WPTR);
5588
5589         return wptr;
5590 }
5591
5592 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5593 {
5594         struct amdgpu_device *adev = ring->adev;
5595
5596         if (ring->use_doorbell) {
5597                 /* XXX check if swapping is necessary on BE */
5598                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5599                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5600         } else {
5601                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5602                 (void)RREG32(mmCP_RB0_WPTR);
5603         }
5604 }
5605
5606 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5607 {
5608         u32 ref_and_mask, reg_mem_engine;
5609
5610         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5611                 switch (ring->me) {
5612                 case 1:
5613                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5614                         break;
5615                 case 2:
5616                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5617                         break;
5618                 default:
5619                         return;
5620                 }
5621                 reg_mem_engine = 0;
5622         } else {
5623                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5624                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5625         }
5626
5627         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5628         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5629                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
5630                                  reg_mem_engine));
5631         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5632         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5633         amdgpu_ring_write(ring, ref_and_mask);
5634         amdgpu_ring_write(ring, ref_and_mask);
5635         amdgpu_ring_write(ring, 0x20); /* poll interval */
5636 }
5637
5638 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5639 {
5640         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5641         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5642                                  WRITE_DATA_DST_SEL(0) |
5643                                  WR_CONFIRM));
5644         amdgpu_ring_write(ring, mmHDP_DEBUG0);
5645         amdgpu_ring_write(ring, 0);
5646         amdgpu_ring_write(ring, 1);
5647
5648 }
5649
5650 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5651                                       struct amdgpu_ib *ib,
5652                                       unsigned vm_id, bool ctx_switch)
5653 {
5654         u32 header, control = 0;
5655         u32 next_rptr = ring->wptr + 5;
5656
5657         if (ctx_switch)
5658                 next_rptr += 2;
5659
5660         next_rptr += 4;
5661         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5662         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5663         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5664         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5665         amdgpu_ring_write(ring, next_rptr);
5666
5667         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5668         if (ctx_switch) {
5669                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5670                 amdgpu_ring_write(ring, 0);
5671         }
5672
5673         if (ib->flags & AMDGPU_IB_FLAG_CE)
5674                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5675         else
5676                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5677
5678         control |= ib->length_dw | (vm_id << 24);
5679
5680         amdgpu_ring_write(ring, header);
5681         amdgpu_ring_write(ring,
5682 #ifdef __BIG_ENDIAN
5683                           (2 << 0) |
5684 #endif
5685                           (ib->gpu_addr & 0xFFFFFFFC));
5686         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5687         amdgpu_ring_write(ring, control);
5688 }
5689
5690 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5691                                           struct amdgpu_ib *ib,
5692                                           unsigned vm_id, bool ctx_switch)
5693 {
5694         u32 header, control = 0;
5695         u32 next_rptr = ring->wptr + 5;
5696
5697         control |= INDIRECT_BUFFER_VALID;
5698
5699         next_rptr += 4;
5700         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5701         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5702         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5703         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5704         amdgpu_ring_write(ring, next_rptr);
5705
5706         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5707
5708         control |= ib->length_dw | (vm_id << 24);
5709
5710         amdgpu_ring_write(ring, header);
5711         amdgpu_ring_write(ring,
5712 #ifdef __BIG_ENDIAN
5713                                           (2 << 0) |
5714 #endif
5715                                           (ib->gpu_addr & 0xFFFFFFFC));
5716         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5717         amdgpu_ring_write(ring, control);
5718 }
5719
5720 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5721                                          u64 seq, unsigned flags)
5722 {
5723         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5724         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5725
5726         /* EVENT_WRITE_EOP - flush caches, send int */
5727         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5728         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5729                                  EOP_TC_ACTION_EN |
5730                                  EOP_TC_WB_ACTION_EN |
5731                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5732                                  EVENT_INDEX(5)));
5733         amdgpu_ring_write(ring, addr & 0xfffffffc);
5734         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5735                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5736         amdgpu_ring_write(ring, lower_32_bits(seq));
5737         amdgpu_ring_write(ring, upper_32_bits(seq));
5738
5739 }
5740
5741 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5742 {
5743         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5744         uint32_t seq = ring->fence_drv.sync_seq;
5745         uint64_t addr = ring->fence_drv.gpu_addr;
5746
5747         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5748         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5749                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
5750                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5751         amdgpu_ring_write(ring, addr & 0xfffffffc);
5752         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5753         amdgpu_ring_write(ring, seq);
5754         amdgpu_ring_write(ring, 0xffffffff);
5755         amdgpu_ring_write(ring, 4); /* poll interval */
5756
5757         if (usepfp) {
5758                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
5759                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5760                 amdgpu_ring_write(ring, 0);
5761                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5762                 amdgpu_ring_write(ring, 0);
5763         }
5764 }
5765
5766 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5767                                         unsigned vm_id, uint64_t pd_addr)
5768 {
5769         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5770
5771         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5772         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5773                                  WRITE_DATA_DST_SEL(0)) |
5774                                  WR_CONFIRM);
5775         if (vm_id < 8) {
5776                 amdgpu_ring_write(ring,
5777                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5778         } else {
5779                 amdgpu_ring_write(ring,
5780                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5781         }
5782         amdgpu_ring_write(ring, 0);
5783         amdgpu_ring_write(ring, pd_addr >> 12);
5784
5785         /* bits 0-15 are the VM contexts0-15 */
5786         /* invalidate the cache */
5787         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5788         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5789                                  WRITE_DATA_DST_SEL(0)));
5790         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5791         amdgpu_ring_write(ring, 0);
5792         amdgpu_ring_write(ring, 1 << vm_id);
5793
5794         /* wait for the invalidate to complete */
5795         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5796         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5797                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5798                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5799         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5800         amdgpu_ring_write(ring, 0);
5801         amdgpu_ring_write(ring, 0); /* ref */
5802         amdgpu_ring_write(ring, 0); /* mask */
5803         amdgpu_ring_write(ring, 0x20); /* poll interval */
5804
5805         /* compute doesn't have PFP */
5806         if (usepfp) {
5807                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5808                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5809                 amdgpu_ring_write(ring, 0x0);
5810                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5811                 amdgpu_ring_write(ring, 0);
5812                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5813                 amdgpu_ring_write(ring, 0);
5814         }
5815 }
5816
5817 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5818 {
5819         return ring->adev->wb.wb[ring->rptr_offs];
5820 }
5821
5822 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5823 {
5824         return ring->adev->wb.wb[ring->wptr_offs];
5825 }
5826
5827 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5828 {
5829         struct amdgpu_device *adev = ring->adev;
5830
5831         /* XXX check if swapping is necessary on BE */
5832         adev->wb.wb[ring->wptr_offs] = ring->wptr;
5833         WDOORBELL32(ring->doorbell_index, ring->wptr);
5834 }
5835
5836 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5837                                              u64 addr, u64 seq,
5838                                              unsigned flags)
5839 {
5840         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5841         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5842
5843         /* RELEASE_MEM - flush caches, send int */
5844         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5845         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5846                                  EOP_TC_ACTION_EN |
5847                                  EOP_TC_WB_ACTION_EN |
5848                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5849                                  EVENT_INDEX(5)));
5850         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5851         amdgpu_ring_write(ring, addr & 0xfffffffc);
5852         amdgpu_ring_write(ring, upper_32_bits(addr));
5853         amdgpu_ring_write(ring, lower_32_bits(seq));
5854         amdgpu_ring_write(ring, upper_32_bits(seq));
5855 }
5856
5857 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5858                                                  enum amdgpu_interrupt_state state)
5859 {
5860         u32 cp_int_cntl;
5861
5862         switch (state) {
5863         case AMDGPU_IRQ_STATE_DISABLE:
5864                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5865                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5866                                             TIME_STAMP_INT_ENABLE, 0);
5867                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5868                 break;
5869         case AMDGPU_IRQ_STATE_ENABLE:
5870                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5871                 cp_int_cntl =
5872                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5873                                       TIME_STAMP_INT_ENABLE, 1);
5874                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5875                 break;
5876         default:
5877                 break;
5878         }
5879 }
5880
5881 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5882                                                      int me, int pipe,
5883                                                      enum amdgpu_interrupt_state state)
5884 {
5885         u32 mec_int_cntl, mec_int_cntl_reg;
5886
5887         /*
5888          * amdgpu controls only pipe 0 of MEC1. That's why this function only
5889          * handles the setting of interrupts for this specific pipe. All other
5890          * pipes' interrupts are set by amdkfd.
5891          */
5892
5893         if (me == 1) {
5894                 switch (pipe) {
5895                 case 0:
5896                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5897                         break;
5898                 default:
5899                         DRM_DEBUG("invalid pipe %d\n", pipe);
5900                         return;
5901                 }
5902         } else {
5903                 DRM_DEBUG("invalid me %d\n", me);
5904                 return;
5905         }
5906
5907         switch (state) {
5908         case AMDGPU_IRQ_STATE_DISABLE:
5909                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5910                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5911                                              TIME_STAMP_INT_ENABLE, 0);
5912                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5913                 break;
5914         case AMDGPU_IRQ_STATE_ENABLE:
5915                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5916                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5917                                              TIME_STAMP_INT_ENABLE, 1);
5918                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5919                 break;
5920         default:
5921                 break;
5922         }
5923 }
5924
5925 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5926                                              struct amdgpu_irq_src *source,
5927                                              unsigned type,
5928                                              enum amdgpu_interrupt_state state)
5929 {
5930         u32 cp_int_cntl;
5931
5932         switch (state) {
5933         case AMDGPU_IRQ_STATE_DISABLE:
5934                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5935                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5936                                             PRIV_REG_INT_ENABLE, 0);
5937                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5938                 break;
5939         case AMDGPU_IRQ_STATE_ENABLE:
5940                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5941                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5942                                             PRIV_REG_INT_ENABLE, 1);
5943                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5944                 break;
5945         default:
5946                 break;
5947         }
5948
5949         return 0;
5950 }
5951
5952 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5953                                               struct amdgpu_irq_src *source,
5954                                               unsigned type,
5955                                               enum amdgpu_interrupt_state state)
5956 {
5957         u32 cp_int_cntl;
5958
5959         switch (state) {
5960         case AMDGPU_IRQ_STATE_DISABLE:
5961                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5962                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5963                                             PRIV_INSTR_INT_ENABLE, 0);
5964                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5965                 break;
5966         case AMDGPU_IRQ_STATE_ENABLE:
5967                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5968                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5969                                             PRIV_INSTR_INT_ENABLE, 1);
5970                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5971                 break;
5972         default:
5973                 break;
5974         }
5975
5976         return 0;
5977 }
5978
5979 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5980                                             struct amdgpu_irq_src *src,
5981                                             unsigned type,
5982                                             enum amdgpu_interrupt_state state)
5983 {
5984         switch (type) {
5985         case AMDGPU_CP_IRQ_GFX_EOP:
5986                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
5987                 break;
5988         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5989                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5990                 break;
5991         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5992                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5993                 break;
5994         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5995                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5996                 break;
5997         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5998                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5999                 break;
6000         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6001                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6002                 break;
6003         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6004                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6005                 break;
6006         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6007                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6008                 break;
6009         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6010                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6011                 break;
6012         default:
6013                 break;
6014         }
6015         return 0;
6016 }
6017
6018 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6019                             struct amdgpu_irq_src *source,
6020                             struct amdgpu_iv_entry *entry)
6021 {
6022         int i;
6023         u8 me_id, pipe_id, queue_id;
6024         struct amdgpu_ring *ring;
6025
6026         DRM_DEBUG("IH: CP EOP\n");
6027         me_id = (entry->ring_id & 0x0c) >> 2;
6028         pipe_id = (entry->ring_id & 0x03) >> 0;
6029         queue_id = (entry->ring_id & 0x70) >> 4;
6030
6031         switch (me_id) {
6032         case 0:
6033                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6034                 break;
6035         case 1:
6036         case 2:
6037                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6038                         ring = &adev->gfx.compute_ring[i];
6039                         /* Per-queue interrupt is supported for MEC starting from VI.
6040                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6041                           */
6042                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6043                                 amdgpu_fence_process(ring);
6044                 }
6045                 break;
6046         }
6047         return 0;
6048 }
6049
6050 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6051                                  struct amdgpu_irq_src *source,
6052                                  struct amdgpu_iv_entry *entry)
6053 {
6054         DRM_ERROR("Illegal register access in command stream\n");
6055         schedule_work(&adev->reset_work);
6056         return 0;
6057 }
6058
6059 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6060                                   struct amdgpu_irq_src *source,
6061                                   struct amdgpu_iv_entry *entry)
6062 {
6063         DRM_ERROR("Illegal instruction in command stream\n");
6064         schedule_work(&adev->reset_work);
6065         return 0;
6066 }
6067
6068 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6069         .name = "gfx_v8_0",
6070         .early_init = gfx_v8_0_early_init,
6071         .late_init = gfx_v8_0_late_init,
6072         .sw_init = gfx_v8_0_sw_init,
6073         .sw_fini = gfx_v8_0_sw_fini,
6074         .hw_init = gfx_v8_0_hw_init,
6075         .hw_fini = gfx_v8_0_hw_fini,
6076         .suspend = gfx_v8_0_suspend,
6077         .resume = gfx_v8_0_resume,
6078         .is_idle = gfx_v8_0_is_idle,
6079         .wait_for_idle = gfx_v8_0_wait_for_idle,
6080         .soft_reset = gfx_v8_0_soft_reset,
6081         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6082         .set_powergating_state = gfx_v8_0_set_powergating_state,
6083 };
6084
6085 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6086         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6087         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6088         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6089         .parse_cs = NULL,
6090         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6091         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6092         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6093         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6094         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6095         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6096         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6097         .test_ring = gfx_v8_0_ring_test_ring,
6098         .test_ib = gfx_v8_0_ring_test_ib,
6099         .insert_nop = amdgpu_ring_insert_nop,
6100         .pad_ib = amdgpu_ring_generic_pad_ib,
6101 };
6102
6103 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6104         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6105         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6106         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6107         .parse_cs = NULL,
6108         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6109         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6110         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6111         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6112         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6113         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6114         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6115         .test_ring = gfx_v8_0_ring_test_ring,
6116         .test_ib = gfx_v8_0_ring_test_ib,
6117         .insert_nop = amdgpu_ring_insert_nop,
6118         .pad_ib = amdgpu_ring_generic_pad_ib,
6119 };
6120
6121 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6122 {
6123         int i;
6124
6125         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6126                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6127
6128         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6129                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6130 }
6131
6132 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6133         .set = gfx_v8_0_set_eop_interrupt_state,
6134         .process = gfx_v8_0_eop_irq,
6135 };
6136
6137 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6138         .set = gfx_v8_0_set_priv_reg_fault_state,
6139         .process = gfx_v8_0_priv_reg_irq,
6140 };
6141
6142 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6143         .set = gfx_v8_0_set_priv_inst_fault_state,
6144         .process = gfx_v8_0_priv_inst_irq,
6145 };
6146
6147 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6148 {
6149         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6150         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6151
6152         adev->gfx.priv_reg_irq.num_types = 1;
6153         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6154
6155         adev->gfx.priv_inst_irq.num_types = 1;
6156         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6157 }
6158
6159 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6160 {
6161         switch (adev->asic_type) {
6162         case CHIP_TOPAZ:
6163         case CHIP_STONEY:
6164                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6165                 break;
6166         case CHIP_CARRIZO:
6167                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6168                 break;
6169         default:
6170                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6171                 break;
6172         }
6173 }
6174
6175 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6176 {
6177         /* init asci gds info */
6178         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6179         adev->gds.gws.total_size = 64;
6180         adev->gds.oa.total_size = 16;
6181
6182         if (adev->gds.mem.total_size == 64 * 1024) {
6183                 adev->gds.mem.gfx_partition_size = 4096;
6184                 adev->gds.mem.cs_partition_size = 4096;
6185
6186                 adev->gds.gws.gfx_partition_size = 4;
6187                 adev->gds.gws.cs_partition_size = 4;
6188
6189                 adev->gds.oa.gfx_partition_size = 4;
6190                 adev->gds.oa.cs_partition_size = 1;
6191         } else {
6192                 adev->gds.mem.gfx_partition_size = 1024;
6193                 adev->gds.mem.cs_partition_size = 1024;
6194
6195                 adev->gds.gws.gfx_partition_size = 16;
6196                 adev->gds.gws.cs_partition_size = 16;
6197
6198                 adev->gds.oa.gfx_partition_size = 4;
6199                 adev->gds.oa.cs_partition_size = 4;
6200         }
6201 }
6202
6203 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6204 {
6205         u32 data, mask;
6206
6207         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6208         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6209
6210         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6211         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6212
6213         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6214
6215         return (~data) & mask;
6216 }
6217
6218 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6219 {
6220         int i, j, k, counter, active_cu_number = 0;
6221         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6222         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6223
6224         memset(cu_info, 0, sizeof(*cu_info));
6225
6226         mutex_lock(&adev->grbm_idx_mutex);
6227         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6228                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6229                         mask = 1;
6230                         ao_bitmap = 0;
6231                         counter = 0;
6232                         gfx_v8_0_select_se_sh(adev, i, j);
6233                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6234                         cu_info->bitmap[i][j] = bitmap;
6235
6236                         for (k = 0; k < 16; k ++) {
6237                                 if (bitmap & mask) {
6238                                         if (counter < 2)
6239                                                 ao_bitmap |= mask;
6240                                         counter ++;
6241                                 }
6242                                 mask <<= 1;
6243                         }
6244                         active_cu_number += counter;
6245                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6246                 }
6247         }
6248         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6249         mutex_unlock(&adev->grbm_idx_mutex);
6250
6251         cu_info->number = active_cu_number;
6252         cu_info->ao_cu_mask = ao_cu_mask;
6253 }