Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160
161 static const u32 golden_settings_tonga_a11[] =
162 {
163         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166         mmGB_GPU_ID, 0x0000000f, 0x00000000,
167         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179
180 static const u32 tonga_golden_common_all[] =
181 {
182         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
274         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
275         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
276         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
277         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
278         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
279         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
280         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
281         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
282         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
283         mmSQ_CONFIG, 0x07f80000, 0x01180000,
284         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
285         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
286         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
287         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
288         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
289         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
290 };
291
292 static const u32 polaris11_golden_common_all[] =
293 {
294         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
296         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 };
301
302 static const u32 golden_settings_polaris10_a11[] =
303 {
304         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
305         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
307         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
312         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
313         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315         mmSQ_CONFIG, 0x07f80000, 0x07180000,
316         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322
323 static const u32 polaris10_golden_common_all[] =
324 {
325         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
327         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
328         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
332         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
333 };
334
335 static const u32 fiji_golden_common_all[] =
336 {
337         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
338         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
339         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
340         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
341         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
342         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
343         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
344         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
345         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
347 };
348
349 static const u32 golden_settings_fiji_a10[] =
350 {
351         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
352         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
353         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
354         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
355         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
356         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
357         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
358         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
359         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
360         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
361         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
362 };
363
364 static const u32 fiji_mgcg_cgcg_init[] =
365 {
366         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
367         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
368         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
373         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
375         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
377         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
384         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
385         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
386         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
387         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
388         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
391         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
392         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
393         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
396         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
397         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
398         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
399         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
401 };
402
403 static const u32 golden_settings_iceland_a11[] =
404 {
405         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
406         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
407         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
408         mmGB_GPU_ID, 0x0000000f, 0x00000000,
409         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
410         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
411         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
412         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
413         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
414         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
415         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
416         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
417         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
418         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
419         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
420         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
421 };
422
423 static const u32 iceland_golden_common_all[] =
424 {
425         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
426         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
427         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
428         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
429         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
430         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
431         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
432         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
433 };
434
435 static const u32 iceland_mgcg_cgcg_init[] =
436 {
437         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
438         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
439         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
442         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
443         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
444         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
446         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
448         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
456         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
457         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
458         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
459         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
460         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
462         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
463         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
464         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
466         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
467         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
468         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
471         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
476         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
481         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
486         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
491         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
496         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
499         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
500         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
501 };
502
503 static const u32 cz_golden_settings_a11[] =
504 {
505         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
506         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
507         mmGB_GPU_ID, 0x0000000f, 0x00000000,
508         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
509         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
510         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
511         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
512         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
513         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
516         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
517 };
518
519 static const u32 cz_golden_common_all[] =
520 {
521         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
523         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
525         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
529 };
530
531 static const u32 cz_mgcg_cgcg_init[] =
532 {
533         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
542         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
544         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
551         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
552         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
553         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
554         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
555         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
558         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
559         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
560         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
562         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
563         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
564         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
567         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
572         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
577         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
582         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
587         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
592         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
597         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
602         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
605         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
606         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
607         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
608 };
609
610 static const u32 stoney_golden_settings_a11[] =
611 {
612         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
613         mmGB_GPU_ID, 0x0000000f, 0x00000000,
614         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
615         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
616         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
617         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
618         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
619         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
620         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
621         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
622 };
623
624 static const u32 stoney_golden_common_all[] =
625 {
626         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
628         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
629         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
630         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
631         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
632         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
633         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
634 };
635
636 static const u32 stoney_mgcg_cgcg_init[] =
637 {
638         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
640         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
642         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
643         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
644 };
645
646 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
650 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
651 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
652
653 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
654 {
655         switch (adev->asic_type) {
656         case CHIP_TOPAZ:
657                 amdgpu_program_register_sequence(adev,
658                                                  iceland_mgcg_cgcg_init,
659                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
660                 amdgpu_program_register_sequence(adev,
661                                                  golden_settings_iceland_a11,
662                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
663                 amdgpu_program_register_sequence(adev,
664                                                  iceland_golden_common_all,
665                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
666                 break;
667         case CHIP_FIJI:
668                 amdgpu_program_register_sequence(adev,
669                                                  fiji_mgcg_cgcg_init,
670                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
671                 amdgpu_program_register_sequence(adev,
672                                                  golden_settings_fiji_a10,
673                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
674                 amdgpu_program_register_sequence(adev,
675                                                  fiji_golden_common_all,
676                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
677                 break;
678
679         case CHIP_TONGA:
680                 amdgpu_program_register_sequence(adev,
681                                                  tonga_mgcg_cgcg_init,
682                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
683                 amdgpu_program_register_sequence(adev,
684                                                  golden_settings_tonga_a11,
685                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
686                 amdgpu_program_register_sequence(adev,
687                                                  tonga_golden_common_all,
688                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
689                 break;
690         case CHIP_POLARIS11:
691                 amdgpu_program_register_sequence(adev,
692                                                  golden_settings_polaris11_a11,
693                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
694                 amdgpu_program_register_sequence(adev,
695                                                  polaris11_golden_common_all,
696                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
697                 break;
698         case CHIP_POLARIS10:
699                 amdgpu_program_register_sequence(adev,
700                                                  golden_settings_polaris10_a11,
701                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
702                 amdgpu_program_register_sequence(adev,
703                                                  polaris10_golden_common_all,
704                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
705                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
706                 if (adev->pdev->revision == 0xc7 &&
707                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
708                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
709                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
710                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
711                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
712                 }
713                 break;
714         case CHIP_CARRIZO:
715                 amdgpu_program_register_sequence(adev,
716                                                  cz_mgcg_cgcg_init,
717                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
718                 amdgpu_program_register_sequence(adev,
719                                                  cz_golden_settings_a11,
720                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
721                 amdgpu_program_register_sequence(adev,
722                                                  cz_golden_common_all,
723                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
724                 break;
725         case CHIP_STONEY:
726                 amdgpu_program_register_sequence(adev,
727                                                  stoney_mgcg_cgcg_init,
728                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
729                 amdgpu_program_register_sequence(adev,
730                                                  stoney_golden_settings_a11,
731                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
732                 amdgpu_program_register_sequence(adev,
733                                                  stoney_golden_common_all,
734                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
735                 break;
736         default:
737                 break;
738         }
739 }
740
741 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
742 {
743         int i;
744
745         adev->gfx.scratch.num_reg = 7;
746         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
747         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
748                 adev->gfx.scratch.free[i] = true;
749                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
750         }
751 }
752
753 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
754 {
755         struct amdgpu_device *adev = ring->adev;
756         uint32_t scratch;
757         uint32_t tmp = 0;
758         unsigned i;
759         int r;
760
761         r = amdgpu_gfx_scratch_get(adev, &scratch);
762         if (r) {
763                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
764                 return r;
765         }
766         WREG32(scratch, 0xCAFEDEAD);
767         r = amdgpu_ring_alloc(ring, 3);
768         if (r) {
769                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
770                           ring->idx, r);
771                 amdgpu_gfx_scratch_free(adev, scratch);
772                 return r;
773         }
774         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
775         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
776         amdgpu_ring_write(ring, 0xDEADBEEF);
777         amdgpu_ring_commit(ring);
778
779         for (i = 0; i < adev->usec_timeout; i++) {
780                 tmp = RREG32(scratch);
781                 if (tmp == 0xDEADBEEF)
782                         break;
783                 DRM_UDELAY(1);
784         }
785         if (i < adev->usec_timeout) {
786                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
787                          ring->idx, i);
788         } else {
789                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
790                           ring->idx, scratch, tmp);
791                 r = -EINVAL;
792         }
793         amdgpu_gfx_scratch_free(adev, scratch);
794         return r;
795 }
796
797 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
798 {
799         struct amdgpu_device *adev = ring->adev;
800         struct amdgpu_ib ib;
801         struct fence *f = NULL;
802         uint32_t scratch;
803         uint32_t tmp = 0;
804         long r;
805
806         r = amdgpu_gfx_scratch_get(adev, &scratch);
807         if (r) {
808                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
809                 return r;
810         }
811         WREG32(scratch, 0xCAFEDEAD);
812         memset(&ib, 0, sizeof(ib));
813         r = amdgpu_ib_get(adev, NULL, 256, &ib);
814         if (r) {
815                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
816                 goto err1;
817         }
818         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
819         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
820         ib.ptr[2] = 0xDEADBEEF;
821         ib.length_dw = 3;
822
823         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
824         if (r)
825                 goto err2;
826
827         r = fence_wait_timeout(f, false, timeout);
828         if (r == 0) {
829                 DRM_ERROR("amdgpu: IB test timed out.\n");
830                 r = -ETIMEDOUT;
831                 goto err2;
832         } else if (r < 0) {
833                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
834                 goto err2;
835         }
836         tmp = RREG32(scratch);
837         if (tmp == 0xDEADBEEF) {
838                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
839                 r = 0;
840         } else {
841                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
842                           scratch, tmp);
843                 r = -EINVAL;
844         }
845 err2:
846         amdgpu_ib_free(adev, &ib, NULL);
847         fence_put(f);
848 err1:
849         amdgpu_gfx_scratch_free(adev, scratch);
850         return r;
851 }
852
853
854 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
855         release_firmware(adev->gfx.pfp_fw);
856         adev->gfx.pfp_fw = NULL;
857         release_firmware(adev->gfx.me_fw);
858         adev->gfx.me_fw = NULL;
859         release_firmware(adev->gfx.ce_fw);
860         adev->gfx.ce_fw = NULL;
861         release_firmware(adev->gfx.rlc_fw);
862         adev->gfx.rlc_fw = NULL;
863         release_firmware(adev->gfx.mec_fw);
864         adev->gfx.mec_fw = NULL;
865         if ((adev->asic_type != CHIP_STONEY) &&
866             (adev->asic_type != CHIP_TOPAZ))
867                 release_firmware(adev->gfx.mec2_fw);
868         adev->gfx.mec2_fw = NULL;
869
870         kfree(adev->gfx.rlc.register_list_format);
871 }
872
873 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
874 {
875         const char *chip_name;
876         char fw_name[30];
877         int err;
878         struct amdgpu_firmware_info *info = NULL;
879         const struct common_firmware_header *header = NULL;
880         const struct gfx_firmware_header_v1_0 *cp_hdr;
881         const struct rlc_firmware_header_v2_0 *rlc_hdr;
882         unsigned int *tmp = NULL, i;
883
884         DRM_DEBUG("\n");
885
886         switch (adev->asic_type) {
887         case CHIP_TOPAZ:
888                 chip_name = "topaz";
889                 break;
890         case CHIP_TONGA:
891                 chip_name = "tonga";
892                 break;
893         case CHIP_CARRIZO:
894                 chip_name = "carrizo";
895                 break;
896         case CHIP_FIJI:
897                 chip_name = "fiji";
898                 break;
899         case CHIP_POLARIS11:
900                 chip_name = "polaris11";
901                 break;
902         case CHIP_POLARIS10:
903                 chip_name = "polaris10";
904                 break;
905         case CHIP_STONEY:
906                 chip_name = "stoney";
907                 break;
908         default:
909                 BUG();
910         }
911
912         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
913         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
914         if (err)
915                 goto out;
916         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
917         if (err)
918                 goto out;
919         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
920         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
921         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
922
923         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
924         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
925         if (err)
926                 goto out;
927         err = amdgpu_ucode_validate(adev->gfx.me_fw);
928         if (err)
929                 goto out;
930         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
931         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
932         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
933
934         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
935         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
936         if (err)
937                 goto out;
938         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
939         if (err)
940                 goto out;
941         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
942         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
943         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
944
945         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
946         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
947         if (err)
948                 goto out;
949         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
950         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
951         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
952         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
953
954         adev->gfx.rlc.save_and_restore_offset =
955                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
956         adev->gfx.rlc.clear_state_descriptor_offset =
957                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
958         adev->gfx.rlc.avail_scratch_ram_locations =
959                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
960         adev->gfx.rlc.reg_restore_list_size =
961                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
962         adev->gfx.rlc.reg_list_format_start =
963                         le32_to_cpu(rlc_hdr->reg_list_format_start);
964         adev->gfx.rlc.reg_list_format_separate_start =
965                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
966         adev->gfx.rlc.starting_offsets_start =
967                         le32_to_cpu(rlc_hdr->starting_offsets_start);
968         adev->gfx.rlc.reg_list_format_size_bytes =
969                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
970         adev->gfx.rlc.reg_list_size_bytes =
971                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
972
973         adev->gfx.rlc.register_list_format =
974                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
975                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
976
977         if (!adev->gfx.rlc.register_list_format) {
978                 err = -ENOMEM;
979                 goto out;
980         }
981
982         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
983                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
984         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
985                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
986
987         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
988
989         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
990                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
991         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
992                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
993
994         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
995         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
996         if (err)
997                 goto out;
998         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
999         if (err)
1000                 goto out;
1001         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1002         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1003         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1004
1005         if ((adev->asic_type != CHIP_STONEY) &&
1006             (adev->asic_type != CHIP_TOPAZ)) {
1007                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1008                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1009                 if (!err) {
1010                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1011                         if (err)
1012                                 goto out;
1013                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1014                                 adev->gfx.mec2_fw->data;
1015                         adev->gfx.mec2_fw_version =
1016                                 le32_to_cpu(cp_hdr->header.ucode_version);
1017                         adev->gfx.mec2_feature_version =
1018                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1019                 } else {
1020                         err = 0;
1021                         adev->gfx.mec2_fw = NULL;
1022                 }
1023         }
1024
1025         if (adev->firmware.smu_load) {
1026                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1027                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1028                 info->fw = adev->gfx.pfp_fw;
1029                 header = (const struct common_firmware_header *)info->fw->data;
1030                 adev->firmware.fw_size +=
1031                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1032
1033                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1034                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1035                 info->fw = adev->gfx.me_fw;
1036                 header = (const struct common_firmware_header *)info->fw->data;
1037                 adev->firmware.fw_size +=
1038                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1039
1040                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1041                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1042                 info->fw = adev->gfx.ce_fw;
1043                 header = (const struct common_firmware_header *)info->fw->data;
1044                 adev->firmware.fw_size +=
1045                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1046
1047                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1048                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1049                 info->fw = adev->gfx.rlc_fw;
1050                 header = (const struct common_firmware_header *)info->fw->data;
1051                 adev->firmware.fw_size +=
1052                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1055                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1056                 info->fw = adev->gfx.mec_fw;
1057                 header = (const struct common_firmware_header *)info->fw->data;
1058                 adev->firmware.fw_size +=
1059                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1060
1061                 if (adev->gfx.mec2_fw) {
1062                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1063                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1064                         info->fw = adev->gfx.mec2_fw;
1065                         header = (const struct common_firmware_header *)info->fw->data;
1066                         adev->firmware.fw_size +=
1067                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1068                 }
1069
1070         }
1071
1072 out:
1073         if (err) {
1074                 dev_err(adev->dev,
1075                         "gfx8: Failed to load firmware \"%s\"\n",
1076                         fw_name);
1077                 release_firmware(adev->gfx.pfp_fw);
1078                 adev->gfx.pfp_fw = NULL;
1079                 release_firmware(adev->gfx.me_fw);
1080                 adev->gfx.me_fw = NULL;
1081                 release_firmware(adev->gfx.ce_fw);
1082                 adev->gfx.ce_fw = NULL;
1083                 release_firmware(adev->gfx.rlc_fw);
1084                 adev->gfx.rlc_fw = NULL;
1085                 release_firmware(adev->gfx.mec_fw);
1086                 adev->gfx.mec_fw = NULL;
1087                 release_firmware(adev->gfx.mec2_fw);
1088                 adev->gfx.mec2_fw = NULL;
1089         }
1090         return err;
1091 }
1092
1093 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1094                                     volatile u32 *buffer)
1095 {
1096         u32 count = 0, i;
1097         const struct cs_section_def *sect = NULL;
1098         const struct cs_extent_def *ext = NULL;
1099
1100         if (adev->gfx.rlc.cs_data == NULL)
1101                 return;
1102         if (buffer == NULL)
1103                 return;
1104
1105         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1106         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1107
1108         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1109         buffer[count++] = cpu_to_le32(0x80000000);
1110         buffer[count++] = cpu_to_le32(0x80000000);
1111
1112         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1113                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1114                         if (sect->id == SECT_CONTEXT) {
1115                                 buffer[count++] =
1116                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1117                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1118                                                 PACKET3_SET_CONTEXT_REG_START);
1119                                 for (i = 0; i < ext->reg_count; i++)
1120                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1121                         } else {
1122                                 return;
1123                         }
1124                 }
1125         }
1126
1127         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1128         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1129                         PACKET3_SET_CONTEXT_REG_START);
1130         switch (adev->asic_type) {
1131         case CHIP_TONGA:
1132         case CHIP_POLARIS10:
1133                 buffer[count++] = cpu_to_le32(0x16000012);
1134                 buffer[count++] = cpu_to_le32(0x0000002A);
1135                 break;
1136         case CHIP_POLARIS11:
1137                 buffer[count++] = cpu_to_le32(0x16000012);
1138                 buffer[count++] = cpu_to_le32(0x00000000);
1139                 break;
1140         case CHIP_FIJI:
1141                 buffer[count++] = cpu_to_le32(0x3a00161a);
1142                 buffer[count++] = cpu_to_le32(0x0000002e);
1143                 break;
1144         case CHIP_TOPAZ:
1145         case CHIP_CARRIZO:
1146                 buffer[count++] = cpu_to_le32(0x00000002);
1147                 buffer[count++] = cpu_to_le32(0x00000000);
1148                 break;
1149         case CHIP_STONEY:
1150                 buffer[count++] = cpu_to_le32(0x00000000);
1151                 buffer[count++] = cpu_to_le32(0x00000000);
1152                 break;
1153         default:
1154                 buffer[count++] = cpu_to_le32(0x00000000);
1155                 buffer[count++] = cpu_to_le32(0x00000000);
1156                 break;
1157         }
1158
1159         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1160         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1161
1162         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1163         buffer[count++] = cpu_to_le32(0);
1164 }
1165
1166 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1167 {
1168         const __le32 *fw_data;
1169         volatile u32 *dst_ptr;
1170         int me, i, max_me = 4;
1171         u32 bo_offset = 0;
1172         u32 table_offset, table_size;
1173
1174         if (adev->asic_type == CHIP_CARRIZO)
1175                 max_me = 5;
1176
1177         /* write the cp table buffer */
1178         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1179         for (me = 0; me < max_me; me++) {
1180                 if (me == 0) {
1181                         const struct gfx_firmware_header_v1_0 *hdr =
1182                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1183                         fw_data = (const __le32 *)
1184                                 (adev->gfx.ce_fw->data +
1185                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1186                         table_offset = le32_to_cpu(hdr->jt_offset);
1187                         table_size = le32_to_cpu(hdr->jt_size);
1188                 } else if (me == 1) {
1189                         const struct gfx_firmware_header_v1_0 *hdr =
1190                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1191                         fw_data = (const __le32 *)
1192                                 (adev->gfx.pfp_fw->data +
1193                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1194                         table_offset = le32_to_cpu(hdr->jt_offset);
1195                         table_size = le32_to_cpu(hdr->jt_size);
1196                 } else if (me == 2) {
1197                         const struct gfx_firmware_header_v1_0 *hdr =
1198                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1199                         fw_data = (const __le32 *)
1200                                 (adev->gfx.me_fw->data +
1201                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1202                         table_offset = le32_to_cpu(hdr->jt_offset);
1203                         table_size = le32_to_cpu(hdr->jt_size);
1204                 } else if (me == 3) {
1205                         const struct gfx_firmware_header_v1_0 *hdr =
1206                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1207                         fw_data = (const __le32 *)
1208                                 (adev->gfx.mec_fw->data +
1209                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1210                         table_offset = le32_to_cpu(hdr->jt_offset);
1211                         table_size = le32_to_cpu(hdr->jt_size);
1212                 } else  if (me == 4) {
1213                         const struct gfx_firmware_header_v1_0 *hdr =
1214                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1215                         fw_data = (const __le32 *)
1216                                 (adev->gfx.mec2_fw->data +
1217                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1218                         table_offset = le32_to_cpu(hdr->jt_offset);
1219                         table_size = le32_to_cpu(hdr->jt_size);
1220                 }
1221
1222                 for (i = 0; i < table_size; i ++) {
1223                         dst_ptr[bo_offset + i] =
1224                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1225                 }
1226
1227                 bo_offset += table_size;
1228         }
1229 }
1230
1231 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1232 {
1233         int r;
1234
1235         /* clear state block */
1236         if (adev->gfx.rlc.clear_state_obj) {
1237                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1238                 if (unlikely(r != 0))
1239                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1240                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1241                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1242                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1243                 adev->gfx.rlc.clear_state_obj = NULL;
1244         }
1245
1246         /* jump table block */
1247         if (adev->gfx.rlc.cp_table_obj) {
1248                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1249                 if (unlikely(r != 0))
1250                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1251                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1252                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1253                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1254                 adev->gfx.rlc.cp_table_obj = NULL;
1255         }
1256 }
1257
1258 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1259 {
1260         volatile u32 *dst_ptr;
1261         u32 dws;
1262         const struct cs_section_def *cs_data;
1263         int r;
1264
1265         adev->gfx.rlc.cs_data = vi_cs_data;
1266
1267         cs_data = adev->gfx.rlc.cs_data;
1268
1269         if (cs_data) {
1270                 /* clear state block */
1271                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1272
1273                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1274                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1275                                              AMDGPU_GEM_DOMAIN_VRAM,
1276                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1277                                              NULL, NULL,
1278                                              &adev->gfx.rlc.clear_state_obj);
1279                         if (r) {
1280                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1281                                 gfx_v8_0_rlc_fini(adev);
1282                                 return r;
1283                         }
1284                 }
1285                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1286                 if (unlikely(r != 0)) {
1287                         gfx_v8_0_rlc_fini(adev);
1288                         return r;
1289                 }
1290                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1291                                   &adev->gfx.rlc.clear_state_gpu_addr);
1292                 if (r) {
1293                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1294                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1295                         gfx_v8_0_rlc_fini(adev);
1296                         return r;
1297                 }
1298
1299                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1300                 if (r) {
1301                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1302                         gfx_v8_0_rlc_fini(adev);
1303                         return r;
1304                 }
1305                 /* set up the cs buffer */
1306                 dst_ptr = adev->gfx.rlc.cs_ptr;
1307                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1308                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1309                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1310         }
1311
1312         if ((adev->asic_type == CHIP_CARRIZO) ||
1313             (adev->asic_type == CHIP_STONEY)) {
1314                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1315                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1316                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1317                                              AMDGPU_GEM_DOMAIN_VRAM,
1318                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1319                                              NULL, NULL,
1320                                              &adev->gfx.rlc.cp_table_obj);
1321                         if (r) {
1322                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1323                                 return r;
1324                         }
1325                 }
1326
1327                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1328                 if (unlikely(r != 0)) {
1329                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1330                         return r;
1331                 }
1332                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1333                                   &adev->gfx.rlc.cp_table_gpu_addr);
1334                 if (r) {
1335                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1336                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1337                         return r;
1338                 }
1339                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1340                 if (r) {
1341                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1342                         return r;
1343                 }
1344
1345                 cz_init_cp_jump_table(adev);
1346
1347                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1348                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1349         }
1350
1351         return 0;
1352 }
1353
1354 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1355 {
1356         int r;
1357
1358         if (adev->gfx.mec.hpd_eop_obj) {
1359                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1360                 if (unlikely(r != 0))
1361                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1362                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1363                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1364                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1365                 adev->gfx.mec.hpd_eop_obj = NULL;
1366         }
1367 }
1368
1369 #define MEC_HPD_SIZE 2048
1370
1371 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1372 {
1373         int r;
1374         u32 *hpd;
1375
1376         /*
1377          * we assign only 1 pipe because all other pipes will
1378          * be handled by KFD
1379          */
1380         adev->gfx.mec.num_mec = 1;
1381         adev->gfx.mec.num_pipe = 1;
1382         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1383
1384         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1385                 r = amdgpu_bo_create(adev,
1386                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1387                                      PAGE_SIZE, true,
1388                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1389                                      &adev->gfx.mec.hpd_eop_obj);
1390                 if (r) {
1391                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1392                         return r;
1393                 }
1394         }
1395
1396         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1397         if (unlikely(r != 0)) {
1398                 gfx_v8_0_mec_fini(adev);
1399                 return r;
1400         }
1401         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1402                           &adev->gfx.mec.hpd_eop_gpu_addr);
1403         if (r) {
1404                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1405                 gfx_v8_0_mec_fini(adev);
1406                 return r;
1407         }
1408         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1409         if (r) {
1410                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1411                 gfx_v8_0_mec_fini(adev);
1412                 return r;
1413         }
1414
1415         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1416
1417         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1418         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1419
1420         return 0;
1421 }
1422
1423 static const u32 vgpr_init_compute_shader[] =
1424 {
1425         0x7e000209, 0x7e020208,
1426         0x7e040207, 0x7e060206,
1427         0x7e080205, 0x7e0a0204,
1428         0x7e0c0203, 0x7e0e0202,
1429         0x7e100201, 0x7e120200,
1430         0x7e140209, 0x7e160208,
1431         0x7e180207, 0x7e1a0206,
1432         0x7e1c0205, 0x7e1e0204,
1433         0x7e200203, 0x7e220202,
1434         0x7e240201, 0x7e260200,
1435         0x7e280209, 0x7e2a0208,
1436         0x7e2c0207, 0x7e2e0206,
1437         0x7e300205, 0x7e320204,
1438         0x7e340203, 0x7e360202,
1439         0x7e380201, 0x7e3a0200,
1440         0x7e3c0209, 0x7e3e0208,
1441         0x7e400207, 0x7e420206,
1442         0x7e440205, 0x7e460204,
1443         0x7e480203, 0x7e4a0202,
1444         0x7e4c0201, 0x7e4e0200,
1445         0x7e500209, 0x7e520208,
1446         0x7e540207, 0x7e560206,
1447         0x7e580205, 0x7e5a0204,
1448         0x7e5c0203, 0x7e5e0202,
1449         0x7e600201, 0x7e620200,
1450         0x7e640209, 0x7e660208,
1451         0x7e680207, 0x7e6a0206,
1452         0x7e6c0205, 0x7e6e0204,
1453         0x7e700203, 0x7e720202,
1454         0x7e740201, 0x7e760200,
1455         0x7e780209, 0x7e7a0208,
1456         0x7e7c0207, 0x7e7e0206,
1457         0xbf8a0000, 0xbf810000,
1458 };
1459
1460 static const u32 sgpr_init_compute_shader[] =
1461 {
1462         0xbe8a0100, 0xbe8c0102,
1463         0xbe8e0104, 0xbe900106,
1464         0xbe920108, 0xbe940100,
1465         0xbe960102, 0xbe980104,
1466         0xbe9a0106, 0xbe9c0108,
1467         0xbe9e0100, 0xbea00102,
1468         0xbea20104, 0xbea40106,
1469         0xbea60108, 0xbea80100,
1470         0xbeaa0102, 0xbeac0104,
1471         0xbeae0106, 0xbeb00108,
1472         0xbeb20100, 0xbeb40102,
1473         0xbeb60104, 0xbeb80106,
1474         0xbeba0108, 0xbebc0100,
1475         0xbebe0102, 0xbec00104,
1476         0xbec20106, 0xbec40108,
1477         0xbec60100, 0xbec80102,
1478         0xbee60004, 0xbee70005,
1479         0xbeea0006, 0xbeeb0007,
1480         0xbee80008, 0xbee90009,
1481         0xbefc0000, 0xbf8a0000,
1482         0xbf810000, 0x00000000,
1483 };
1484
1485 static const u32 vgpr_init_regs[] =
1486 {
1487         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1488         mmCOMPUTE_RESOURCE_LIMITS, 0,
1489         mmCOMPUTE_NUM_THREAD_X, 256*4,
1490         mmCOMPUTE_NUM_THREAD_Y, 1,
1491         mmCOMPUTE_NUM_THREAD_Z, 1,
1492         mmCOMPUTE_PGM_RSRC2, 20,
1493         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1494         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1495         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1496         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1497         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1498         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1499         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1500         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1501         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1502         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1503 };
1504
1505 static const u32 sgpr1_init_regs[] =
1506 {
1507         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1508         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1509         mmCOMPUTE_NUM_THREAD_X, 256*5,
1510         mmCOMPUTE_NUM_THREAD_Y, 1,
1511         mmCOMPUTE_NUM_THREAD_Z, 1,
1512         mmCOMPUTE_PGM_RSRC2, 20,
1513         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1514         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1515         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1516         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1517         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1518         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1519         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1520         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1521         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1522         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1523 };
1524
1525 static const u32 sgpr2_init_regs[] =
1526 {
1527         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1528         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1529         mmCOMPUTE_NUM_THREAD_X, 256*5,
1530         mmCOMPUTE_NUM_THREAD_Y, 1,
1531         mmCOMPUTE_NUM_THREAD_Z, 1,
1532         mmCOMPUTE_PGM_RSRC2, 20,
1533         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1534         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1535         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1536         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1537         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1538         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1539         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1540         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1541         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1542         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1543 };
1544
1545 static const u32 sec_ded_counter_registers[] =
1546 {
1547         mmCPC_EDC_ATC_CNT,
1548         mmCPC_EDC_SCRATCH_CNT,
1549         mmCPC_EDC_UCODE_CNT,
1550         mmCPF_EDC_ATC_CNT,
1551         mmCPF_EDC_ROQ_CNT,
1552         mmCPF_EDC_TAG_CNT,
1553         mmCPG_EDC_ATC_CNT,
1554         mmCPG_EDC_DMA_CNT,
1555         mmCPG_EDC_TAG_CNT,
1556         mmDC_EDC_CSINVOC_CNT,
1557         mmDC_EDC_RESTORE_CNT,
1558         mmDC_EDC_STATE_CNT,
1559         mmGDS_EDC_CNT,
1560         mmGDS_EDC_GRBM_CNT,
1561         mmGDS_EDC_OA_DED,
1562         mmSPI_EDC_CNT,
1563         mmSQC_ATC_EDC_GATCL1_CNT,
1564         mmSQC_EDC_CNT,
1565         mmSQ_EDC_DED_CNT,
1566         mmSQ_EDC_INFO,
1567         mmSQ_EDC_SEC_CNT,
1568         mmTCC_EDC_CNT,
1569         mmTCP_ATC_EDC_GATCL1_CNT,
1570         mmTCP_EDC_CNT,
1571         mmTD_EDC_CNT
1572 };
1573
1574 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1575 {
1576         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1577         struct amdgpu_ib ib;
1578         struct fence *f = NULL;
1579         int r, i;
1580         u32 tmp;
1581         unsigned total_size, vgpr_offset, sgpr_offset;
1582         u64 gpu_addr;
1583
1584         /* only supported on CZ */
1585         if (adev->asic_type != CHIP_CARRIZO)
1586                 return 0;
1587
1588         /* bail if the compute ring is not ready */
1589         if (!ring->ready)
1590                 return 0;
1591
1592         tmp = RREG32(mmGB_EDC_MODE);
1593         WREG32(mmGB_EDC_MODE, 0);
1594
1595         total_size =
1596                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1597         total_size +=
1598                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1599         total_size +=
1600                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1601         total_size = ALIGN(total_size, 256);
1602         vgpr_offset = total_size;
1603         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1604         sgpr_offset = total_size;
1605         total_size += sizeof(sgpr_init_compute_shader);
1606
1607         /* allocate an indirect buffer to put the commands in */
1608         memset(&ib, 0, sizeof(ib));
1609         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1610         if (r) {
1611                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1612                 return r;
1613         }
1614
1615         /* load the compute shaders */
1616         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1617                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1618
1619         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1620                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1621
1622         /* init the ib length to 0 */
1623         ib.length_dw = 0;
1624
1625         /* VGPR */
1626         /* write the register state for the compute dispatch */
1627         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1628                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1630                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1631         }
1632         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1634         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1638
1639         /* write dispatch packet */
1640         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641         ib.ptr[ib.length_dw++] = 8; /* x */
1642         ib.ptr[ib.length_dw++] = 1; /* y */
1643         ib.ptr[ib.length_dw++] = 1; /* z */
1644         ib.ptr[ib.length_dw++] =
1645                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1646
1647         /* write CS partial flush packet */
1648         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1650
1651         /* SGPR1 */
1652         /* write the register state for the compute dispatch */
1653         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1654                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1656                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1657         }
1658         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1660         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664
1665         /* write dispatch packet */
1666         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667         ib.ptr[ib.length_dw++] = 8; /* x */
1668         ib.ptr[ib.length_dw++] = 1; /* y */
1669         ib.ptr[ib.length_dw++] = 1; /* z */
1670         ib.ptr[ib.length_dw++] =
1671                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672
1673         /* write CS partial flush packet */
1674         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676
1677         /* SGPR2 */
1678         /* write the register state for the compute dispatch */
1679         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1680                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1681                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1682                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1683         }
1684         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1685         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1686         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1687         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1688         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1689         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1690
1691         /* write dispatch packet */
1692         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1693         ib.ptr[ib.length_dw++] = 8; /* x */
1694         ib.ptr[ib.length_dw++] = 1; /* y */
1695         ib.ptr[ib.length_dw++] = 1; /* z */
1696         ib.ptr[ib.length_dw++] =
1697                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1698
1699         /* write CS partial flush packet */
1700         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1701         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1702
1703         /* shedule the ib on the ring */
1704         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1705         if (r) {
1706                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1707                 goto fail;
1708         }
1709
1710         /* wait for the GPU to finish processing the IB */
1711         r = fence_wait(f, false);
1712         if (r) {
1713                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1714                 goto fail;
1715         }
1716
1717         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1718         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1719         WREG32(mmGB_EDC_MODE, tmp);
1720
1721         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1722         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1723         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1724
1725
1726         /* read back registers to clear the counters */
1727         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1728                 RREG32(sec_ded_counter_registers[i]);
1729
1730 fail:
1731         amdgpu_ib_free(adev, &ib, NULL);
1732         fence_put(f);
1733
1734         return r;
1735 }
1736
1737 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1738 {
1739         u32 gb_addr_config;
1740         u32 mc_shared_chmap, mc_arb_ramcfg;
1741         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1742         u32 tmp;
1743         int ret;
1744
1745         switch (adev->asic_type) {
1746         case CHIP_TOPAZ:
1747                 adev->gfx.config.max_shader_engines = 1;
1748                 adev->gfx.config.max_tile_pipes = 2;
1749                 adev->gfx.config.max_cu_per_sh = 6;
1750                 adev->gfx.config.max_sh_per_se = 1;
1751                 adev->gfx.config.max_backends_per_se = 2;
1752                 adev->gfx.config.max_texture_channel_caches = 2;
1753                 adev->gfx.config.max_gprs = 256;
1754                 adev->gfx.config.max_gs_threads = 32;
1755                 adev->gfx.config.max_hw_contexts = 8;
1756
1757                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1758                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1759                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1760                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1761                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1762                 break;
1763         case CHIP_FIJI:
1764                 adev->gfx.config.max_shader_engines = 4;
1765                 adev->gfx.config.max_tile_pipes = 16;
1766                 adev->gfx.config.max_cu_per_sh = 16;
1767                 adev->gfx.config.max_sh_per_se = 1;
1768                 adev->gfx.config.max_backends_per_se = 4;
1769                 adev->gfx.config.max_texture_channel_caches = 16;
1770                 adev->gfx.config.max_gprs = 256;
1771                 adev->gfx.config.max_gs_threads = 32;
1772                 adev->gfx.config.max_hw_contexts = 8;
1773
1774                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1775                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1776                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1777                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1778                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1779                 break;
1780         case CHIP_POLARIS11:
1781                 ret = amdgpu_atombios_get_gfx_info(adev);
1782                 if (ret)
1783                         return ret;
1784                 adev->gfx.config.max_gprs = 256;
1785                 adev->gfx.config.max_gs_threads = 32;
1786                 adev->gfx.config.max_hw_contexts = 8;
1787
1788                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1789                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1790                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1791                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1792                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1793                 break;
1794         case CHIP_POLARIS10:
1795                 ret = amdgpu_atombios_get_gfx_info(adev);
1796                 if (ret)
1797                         return ret;
1798                 adev->gfx.config.max_gprs = 256;
1799                 adev->gfx.config.max_gs_threads = 32;
1800                 adev->gfx.config.max_hw_contexts = 8;
1801
1802                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1803                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1804                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1805                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1806                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1807                 break;
1808         case CHIP_TONGA:
1809                 adev->gfx.config.max_shader_engines = 4;
1810                 adev->gfx.config.max_tile_pipes = 8;
1811                 adev->gfx.config.max_cu_per_sh = 8;
1812                 adev->gfx.config.max_sh_per_se = 1;
1813                 adev->gfx.config.max_backends_per_se = 2;
1814                 adev->gfx.config.max_texture_channel_caches = 8;
1815                 adev->gfx.config.max_gprs = 256;
1816                 adev->gfx.config.max_gs_threads = 32;
1817                 adev->gfx.config.max_hw_contexts = 8;
1818
1819                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1820                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1821                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1822                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1823                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1824                 break;
1825         case CHIP_CARRIZO:
1826                 adev->gfx.config.max_shader_engines = 1;
1827                 adev->gfx.config.max_tile_pipes = 2;
1828                 adev->gfx.config.max_sh_per_se = 1;
1829                 adev->gfx.config.max_backends_per_se = 2;
1830
1831                 switch (adev->pdev->revision) {
1832                 case 0xc4:
1833                 case 0x84:
1834                 case 0xc8:
1835                 case 0xcc:
1836                 case 0xe1:
1837                 case 0xe3:
1838                         /* B10 */
1839                         adev->gfx.config.max_cu_per_sh = 8;
1840                         break;
1841                 case 0xc5:
1842                 case 0x81:
1843                 case 0x85:
1844                 case 0xc9:
1845                 case 0xcd:
1846                 case 0xe2:
1847                 case 0xe4:
1848                         /* B8 */
1849                         adev->gfx.config.max_cu_per_sh = 6;
1850                         break;
1851                 case 0xc6:
1852                 case 0xca:
1853                 case 0xce:
1854                 case 0x88:
1855                         /* B6 */
1856                         adev->gfx.config.max_cu_per_sh = 6;
1857                         break;
1858                 case 0xc7:
1859                 case 0x87:
1860                 case 0xcb:
1861                 case 0xe5:
1862                 case 0x89:
1863                 default:
1864                         /* B4 */
1865                         adev->gfx.config.max_cu_per_sh = 4;
1866                         break;
1867                 }
1868
1869                 adev->gfx.config.max_texture_channel_caches = 2;
1870                 adev->gfx.config.max_gprs = 256;
1871                 adev->gfx.config.max_gs_threads = 32;
1872                 adev->gfx.config.max_hw_contexts = 8;
1873
1874                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1875                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1876                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1877                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1878                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1879                 break;
1880         case CHIP_STONEY:
1881                 adev->gfx.config.max_shader_engines = 1;
1882                 adev->gfx.config.max_tile_pipes = 2;
1883                 adev->gfx.config.max_sh_per_se = 1;
1884                 adev->gfx.config.max_backends_per_se = 1;
1885
1886                 switch (adev->pdev->revision) {
1887                 case 0xc0:
1888                 case 0xc1:
1889                 case 0xc2:
1890                 case 0xc4:
1891                 case 0xc8:
1892                 case 0xc9:
1893                         adev->gfx.config.max_cu_per_sh = 3;
1894                         break;
1895                 case 0xd0:
1896                 case 0xd1:
1897                 case 0xd2:
1898                 default:
1899                         adev->gfx.config.max_cu_per_sh = 2;
1900                         break;
1901                 }
1902
1903                 adev->gfx.config.max_texture_channel_caches = 2;
1904                 adev->gfx.config.max_gprs = 256;
1905                 adev->gfx.config.max_gs_threads = 16;
1906                 adev->gfx.config.max_hw_contexts = 8;
1907
1908                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1909                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1910                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1911                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1912                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1913                 break;
1914         default:
1915                 adev->gfx.config.max_shader_engines = 2;
1916                 adev->gfx.config.max_tile_pipes = 4;
1917                 adev->gfx.config.max_cu_per_sh = 2;
1918                 adev->gfx.config.max_sh_per_se = 1;
1919                 adev->gfx.config.max_backends_per_se = 2;
1920                 adev->gfx.config.max_texture_channel_caches = 4;
1921                 adev->gfx.config.max_gprs = 256;
1922                 adev->gfx.config.max_gs_threads = 32;
1923                 adev->gfx.config.max_hw_contexts = 8;
1924
1925                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1926                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1927                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1928                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1929                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1930                 break;
1931         }
1932
1933         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1934         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1935         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1936
1937         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1938         adev->gfx.config.mem_max_burst_length_bytes = 256;
1939         if (adev->flags & AMD_IS_APU) {
1940                 /* Get memory bank mapping mode. */
1941                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1942                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1943                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1944
1945                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1946                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1947                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1948
1949                 /* Validate settings in case only one DIMM installed. */
1950                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1951                         dimm00_addr_map = 0;
1952                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1953                         dimm01_addr_map = 0;
1954                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1955                         dimm10_addr_map = 0;
1956                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1957                         dimm11_addr_map = 0;
1958
1959                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1960                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1961                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1962                         adev->gfx.config.mem_row_size_in_kb = 2;
1963                 else
1964                         adev->gfx.config.mem_row_size_in_kb = 1;
1965         } else {
1966                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1967                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1968                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1969                         adev->gfx.config.mem_row_size_in_kb = 4;
1970         }
1971
1972         adev->gfx.config.shader_engine_tile_size = 32;
1973         adev->gfx.config.num_gpus = 1;
1974         adev->gfx.config.multi_gpu_tile_size = 64;
1975
1976         /* fix up row size */
1977         switch (adev->gfx.config.mem_row_size_in_kb) {
1978         case 1:
1979         default:
1980                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1981                 break;
1982         case 2:
1983                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1984                 break;
1985         case 4:
1986                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1987                 break;
1988         }
1989         adev->gfx.config.gb_addr_config = gb_addr_config;
1990
1991         return 0;
1992 }
1993
1994 static int gfx_v8_0_sw_init(void *handle)
1995 {
1996         int i, r;
1997         struct amdgpu_ring *ring;
1998         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1999
2000         /* EOP Event */
2001         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2002         if (r)
2003                 return r;
2004
2005         /* Privileged reg */
2006         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2007         if (r)
2008                 return r;
2009
2010         /* Privileged inst */
2011         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2012         if (r)
2013                 return r;
2014
2015         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2016
2017         gfx_v8_0_scratch_init(adev);
2018
2019         r = gfx_v8_0_init_microcode(adev);
2020         if (r) {
2021                 DRM_ERROR("Failed to load gfx firmware!\n");
2022                 return r;
2023         }
2024
2025         r = gfx_v8_0_rlc_init(adev);
2026         if (r) {
2027                 DRM_ERROR("Failed to init rlc BOs!\n");
2028                 return r;
2029         }
2030
2031         r = gfx_v8_0_mec_init(adev);
2032         if (r) {
2033                 DRM_ERROR("Failed to init MEC BOs!\n");
2034                 return r;
2035         }
2036
2037         /* set up the gfx ring */
2038         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2039                 ring = &adev->gfx.gfx_ring[i];
2040                 ring->ring_obj = NULL;
2041                 sprintf(ring->name, "gfx");
2042                 /* no gfx doorbells on iceland */
2043                 if (adev->asic_type != CHIP_TOPAZ) {
2044                         ring->use_doorbell = true;
2045                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2046                 }
2047
2048                 r = amdgpu_ring_init(adev, ring, 1024,
2049                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2050                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2051                                      AMDGPU_RING_TYPE_GFX);
2052                 if (r)
2053                         return r;
2054         }
2055
2056         /* set up the compute queues */
2057         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2058                 unsigned irq_type;
2059
2060                 /* max 32 queues per MEC */
2061                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2062                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2063                         break;
2064                 }
2065                 ring = &adev->gfx.compute_ring[i];
2066                 ring->ring_obj = NULL;
2067                 ring->use_doorbell = true;
2068                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2069                 ring->me = 1; /* first MEC */
2070                 ring->pipe = i / 8;
2071                 ring->queue = i % 8;
2072                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2073                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2074                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2075                 r = amdgpu_ring_init(adev, ring, 1024,
2076                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2077                                      &adev->gfx.eop_irq, irq_type,
2078                                      AMDGPU_RING_TYPE_COMPUTE);
2079                 if (r)
2080                         return r;
2081         }
2082
2083         /* reserve GDS, GWS and OA resource for gfx */
2084         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2085                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2086                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2087         if (r)
2088                 return r;
2089
2090         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2091                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2092                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2093         if (r)
2094                 return r;
2095
2096         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2097                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2098                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2099         if (r)
2100                 return r;
2101
2102         adev->gfx.ce_ram_size = 0x8000;
2103
2104         r = gfx_v8_0_gpu_early_init(adev);
2105         if (r)
2106                 return r;
2107
2108         return 0;
2109 }
2110
2111 static int gfx_v8_0_sw_fini(void *handle)
2112 {
2113         int i;
2114         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2115
2116         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2117         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2118         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2119
2120         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2121                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2122         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2123                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2124
2125         gfx_v8_0_mec_fini(adev);
2126         gfx_v8_0_rlc_fini(adev);
2127         gfx_v8_0_free_microcode(adev);
2128
2129         return 0;
2130 }
2131
2132 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2133 {
2134         uint32_t *modearray, *mod2array;
2135         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2136         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2137         u32 reg_offset;
2138
2139         modearray = adev->gfx.config.tile_mode_array;
2140         mod2array = adev->gfx.config.macrotile_mode_array;
2141
2142         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2143                 modearray[reg_offset] = 0;
2144
2145         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2146                 mod2array[reg_offset] = 0;
2147
2148         switch (adev->asic_type) {
2149         case CHIP_TOPAZ:
2150                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2151                                 PIPE_CONFIG(ADDR_SURF_P2) |
2152                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2153                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2154                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2155                                 PIPE_CONFIG(ADDR_SURF_P2) |
2156                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2157                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2158                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2159                                 PIPE_CONFIG(ADDR_SURF_P2) |
2160                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2161                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2162                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2163                                 PIPE_CONFIG(ADDR_SURF_P2) |
2164                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2165                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2166                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2167                                 PIPE_CONFIG(ADDR_SURF_P2) |
2168                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2169                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2170                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2171                                 PIPE_CONFIG(ADDR_SURF_P2) |
2172                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2173                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2174                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2175                                 PIPE_CONFIG(ADDR_SURF_P2) |
2176                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2177                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2178                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2179                                 PIPE_CONFIG(ADDR_SURF_P2));
2180                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2181                                 PIPE_CONFIG(ADDR_SURF_P2) |
2182                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2183                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2185                                  PIPE_CONFIG(ADDR_SURF_P2) |
2186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2188                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2189                                  PIPE_CONFIG(ADDR_SURF_P2) |
2190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2192                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2193                                  PIPE_CONFIG(ADDR_SURF_P2) |
2194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2197                                  PIPE_CONFIG(ADDR_SURF_P2) |
2198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2200                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2201                                  PIPE_CONFIG(ADDR_SURF_P2) |
2202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2205                                  PIPE_CONFIG(ADDR_SURF_P2) |
2206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2208                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2209                                  PIPE_CONFIG(ADDR_SURF_P2) |
2210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2212                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2213                                  PIPE_CONFIG(ADDR_SURF_P2) |
2214                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2215                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2216                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2217                                  PIPE_CONFIG(ADDR_SURF_P2) |
2218                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2219                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2220                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2221                                  PIPE_CONFIG(ADDR_SURF_P2) |
2222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2224                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2225                                  PIPE_CONFIG(ADDR_SURF_P2) |
2226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2228                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2229                                  PIPE_CONFIG(ADDR_SURF_P2) |
2230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2232                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2233                                  PIPE_CONFIG(ADDR_SURF_P2) |
2234                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2235                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2236                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2237                                  PIPE_CONFIG(ADDR_SURF_P2) |
2238                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2239                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2240                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2241                                  PIPE_CONFIG(ADDR_SURF_P2) |
2242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2244                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245                                  PIPE_CONFIG(ADDR_SURF_P2) |
2246                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2247                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2248                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2249                                  PIPE_CONFIG(ADDR_SURF_P2) |
2250                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2251                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2252
2253                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2254                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2255                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256                                 NUM_BANKS(ADDR_SURF_8_BANK));
2257                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2258                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2259                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2260                                 NUM_BANKS(ADDR_SURF_8_BANK));
2261                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2262                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2263                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264                                 NUM_BANKS(ADDR_SURF_8_BANK));
2265                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2267                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268                                 NUM_BANKS(ADDR_SURF_8_BANK));
2269                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2270                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2271                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2272                                 NUM_BANKS(ADDR_SURF_8_BANK));
2273                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2276                                 NUM_BANKS(ADDR_SURF_8_BANK));
2277                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2279                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2280                                 NUM_BANKS(ADDR_SURF_8_BANK));
2281                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2282                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2283                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2284                                 NUM_BANKS(ADDR_SURF_16_BANK));
2285                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2286                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288                                 NUM_BANKS(ADDR_SURF_16_BANK));
2289                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2290                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2291                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2292                                  NUM_BANKS(ADDR_SURF_16_BANK));
2293                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2294                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2295                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2296                                  NUM_BANKS(ADDR_SURF_16_BANK));
2297                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2299                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2300                                  NUM_BANKS(ADDR_SURF_16_BANK));
2301                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2303                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2304                                  NUM_BANKS(ADDR_SURF_16_BANK));
2305                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2306                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2307                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2308                                  NUM_BANKS(ADDR_SURF_8_BANK));
2309
2310                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2311                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2312                             reg_offset != 23)
2313                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2314
2315                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2316                         if (reg_offset != 7)
2317                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2318
2319                 break;
2320         case CHIP_FIJI:
2321                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2324                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2325                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2326                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2328                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2329                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2330                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2331                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2332                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2333                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2335                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2336                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2337                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2338                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2339                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2340                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2341                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2342                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2343                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2344                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2345                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2346                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2348                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2349                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2350                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2351                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2352                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2353                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2354                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2355                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2356                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2357                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2358                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2359                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2360                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2363                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2364                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2367                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2368                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2371                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2372                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2375                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2376                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2379                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2380                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2383                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2384                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2386                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2387                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2388                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2389                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2390                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2391                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2392                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2393                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2394                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2395                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2396                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2398                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2399                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2400                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2402                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2403                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2404                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2406                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2407                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2408                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2410                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2411                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2412                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2414                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2415                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2416                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2419                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2420                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2422                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2423                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2424                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2426                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2427                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2428                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2432                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2440                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2443
2444                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2447                                 NUM_BANKS(ADDR_SURF_8_BANK));
2448                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2451                                 NUM_BANKS(ADDR_SURF_8_BANK));
2452                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2454                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2455                                 NUM_BANKS(ADDR_SURF_8_BANK));
2456                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2458                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2459                                 NUM_BANKS(ADDR_SURF_8_BANK));
2460                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2462                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463                                 NUM_BANKS(ADDR_SURF_8_BANK));
2464                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2467                                 NUM_BANKS(ADDR_SURF_8_BANK));
2468                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471                                 NUM_BANKS(ADDR_SURF_8_BANK));
2472                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2474                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2475                                 NUM_BANKS(ADDR_SURF_8_BANK));
2476                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2478                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479                                 NUM_BANKS(ADDR_SURF_8_BANK));
2480                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2482                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                                  NUM_BANKS(ADDR_SURF_8_BANK));
2484                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                                  NUM_BANKS(ADDR_SURF_8_BANK));
2488                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2490                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2491                                  NUM_BANKS(ADDR_SURF_8_BANK));
2492                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2494                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495                                  NUM_BANKS(ADDR_SURF_8_BANK));
2496                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2498                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2499                                  NUM_BANKS(ADDR_SURF_4_BANK));
2500
2501                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2502                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2503
2504                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2505                         if (reg_offset != 7)
2506                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2507
2508                 break;
2509         case CHIP_TONGA:
2510                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2513                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2514                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2517                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2518                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2519                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2521                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2522                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2523                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2525                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2526                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2527                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2529                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2530                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2531                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2533                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2534                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2535                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2536                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2537                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2538                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2539                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2540                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2541                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2542                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2543                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2544                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2545                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2546                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2547                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2553                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2556                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2557                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2558                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2560                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2561                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2563                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2564                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2566                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2567                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2569                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2571                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2573                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2574                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2575                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2576                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2577                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2578                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2579                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2580                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2581                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2583                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2584                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2585                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2586                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2587                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2588                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2589                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2591                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2592                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2593                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2595                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2596                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2597                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2599                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2600                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2601                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2602                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2603                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2604                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2605                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2607                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2608                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2609                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2611                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2612                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2613                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2615                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2616                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2617                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2619                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2623                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2626                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2627                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2629                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2630                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2631                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2632
2633                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2634                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2635                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2636                                 NUM_BANKS(ADDR_SURF_16_BANK));
2637                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2638                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2639                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2640                                 NUM_BANKS(ADDR_SURF_16_BANK));
2641                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2643                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2644                                 NUM_BANKS(ADDR_SURF_16_BANK));
2645                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2647                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2648                                 NUM_BANKS(ADDR_SURF_16_BANK));
2649                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2650                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2651                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2652                                 NUM_BANKS(ADDR_SURF_16_BANK));
2653                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2654                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2655                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2656                                 NUM_BANKS(ADDR_SURF_16_BANK));
2657                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2658                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2659                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2660                                 NUM_BANKS(ADDR_SURF_16_BANK));
2661                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2662                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2663                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2664                                 NUM_BANKS(ADDR_SURF_16_BANK));
2665                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2667                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2668                                 NUM_BANKS(ADDR_SURF_16_BANK));
2669                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2670                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2671                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2672                                  NUM_BANKS(ADDR_SURF_16_BANK));
2673                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2675                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2676                                  NUM_BANKS(ADDR_SURF_16_BANK));
2677                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2678                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2679                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2680                                  NUM_BANKS(ADDR_SURF_8_BANK));
2681                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2683                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2684                                  NUM_BANKS(ADDR_SURF_4_BANK));
2685                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2688                                  NUM_BANKS(ADDR_SURF_4_BANK));
2689
2690                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2691                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2692
2693                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2694                         if (reg_offset != 7)
2695                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2696
2697                 break;
2698         case CHIP_POLARIS11:
2699                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2702                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2703                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2706                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2707                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2710                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2711                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2714                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2715                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2718                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2719                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2722                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2723                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2726                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2727                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2730                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2731                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2733                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2734                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2736                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2737                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2740                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2742                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2744                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2745                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2748                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2749                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2750                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2752                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2753                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2756                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2757                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2758                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2760                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2761                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2762                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2764                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2765                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2766                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2768                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2769                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2770                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2772                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2773                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2774                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2776                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2777                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2778                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2780                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2781                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2782                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2784                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2785                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2786                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2788                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2789                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2790                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2792                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2793                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2794                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2796                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2797                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2798                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2800                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2801                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2802                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2804                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2805                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2806                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2808                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2809                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2810                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2812                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2813                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2814                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2816                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2817                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2818                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2821
2822                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2823                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2824                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2825                                 NUM_BANKS(ADDR_SURF_16_BANK));
2826
2827                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830                                 NUM_BANKS(ADDR_SURF_16_BANK));
2831
2832                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                 NUM_BANKS(ADDR_SURF_16_BANK));
2836
2837                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2839                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840                                 NUM_BANKS(ADDR_SURF_16_BANK));
2841
2842                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2844                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2845                                 NUM_BANKS(ADDR_SURF_16_BANK));
2846
2847                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850                                 NUM_BANKS(ADDR_SURF_16_BANK));
2851
2852                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                 NUM_BANKS(ADDR_SURF_16_BANK));
2856
2857                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2858                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2859                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860                                 NUM_BANKS(ADDR_SURF_16_BANK));
2861
2862                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2863                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2864                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865                                 NUM_BANKS(ADDR_SURF_16_BANK));
2866
2867                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2868                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2869                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2870                                 NUM_BANKS(ADDR_SURF_16_BANK));
2871
2872                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2874                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875                                 NUM_BANKS(ADDR_SURF_16_BANK));
2876
2877                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2879                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2880                                 NUM_BANKS(ADDR_SURF_16_BANK));
2881
2882                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2885                                 NUM_BANKS(ADDR_SURF_8_BANK));
2886
2887                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2888                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2889                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2890                                 NUM_BANKS(ADDR_SURF_4_BANK));
2891
2892                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2893                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2894
2895                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2896                         if (reg_offset != 7)
2897                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2898
2899                 break;
2900         case CHIP_POLARIS10:
2901                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2904                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2905                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2908                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2909                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2912                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2913                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2916                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2917                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2920                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2921                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2924                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2928                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2929                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2931                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2932                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2933                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2935                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2936                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2937                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2940                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2944                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2946                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2947                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2949                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2951                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2952                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2953                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2954                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2956                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2960                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2962                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2964                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2967                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2968                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2969                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2970                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2971                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2972                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2973                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2975                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2976                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2978                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2979                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2980                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2982                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2983                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2984                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2986                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2987                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2988                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2990                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2991                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2992                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2993                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2994                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2995                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2996                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2997                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2998                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2999                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3000                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3002                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3004                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3008                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3010                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3012                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3014                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3016                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3018                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3019                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3020                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3021                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3022                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3023
3024                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3025                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3026                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3027                                 NUM_BANKS(ADDR_SURF_16_BANK));
3028
3029                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3031                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032                                 NUM_BANKS(ADDR_SURF_16_BANK));
3033
3034                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037                                 NUM_BANKS(ADDR_SURF_16_BANK));
3038
3039                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3041                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3042                                 NUM_BANKS(ADDR_SURF_16_BANK));
3043
3044                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3046                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3047                                 NUM_BANKS(ADDR_SURF_16_BANK));
3048
3049                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3051                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3052                                 NUM_BANKS(ADDR_SURF_16_BANK));
3053
3054                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057                                 NUM_BANKS(ADDR_SURF_16_BANK));
3058
3059                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3061                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3062                                 NUM_BANKS(ADDR_SURF_16_BANK));
3063
3064                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3066                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067                                 NUM_BANKS(ADDR_SURF_16_BANK));
3068
3069                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3071                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3072                                 NUM_BANKS(ADDR_SURF_16_BANK));
3073
3074                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3076                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077                                 NUM_BANKS(ADDR_SURF_16_BANK));
3078
3079                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3082                                 NUM_BANKS(ADDR_SURF_8_BANK));
3083
3084                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3087                                 NUM_BANKS(ADDR_SURF_4_BANK));
3088
3089                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3091                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3092                                 NUM_BANKS(ADDR_SURF_4_BANK));
3093
3094                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3095                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3096
3097                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3098                         if (reg_offset != 7)
3099                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3100
3101                 break;
3102         case CHIP_STONEY:
3103                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3104                                 PIPE_CONFIG(ADDR_SURF_P2) |
3105                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3106                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3107                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108                                 PIPE_CONFIG(ADDR_SURF_P2) |
3109                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3110                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3111                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3112                                 PIPE_CONFIG(ADDR_SURF_P2) |
3113                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3114                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3115                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116                                 PIPE_CONFIG(ADDR_SURF_P2) |
3117                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3118                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3119                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120                                 PIPE_CONFIG(ADDR_SURF_P2) |
3121                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3122                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3123                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3124                                 PIPE_CONFIG(ADDR_SURF_P2) |
3125                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3126                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3127                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3128                                 PIPE_CONFIG(ADDR_SURF_P2) |
3129                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3130                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3131                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3132                                 PIPE_CONFIG(ADDR_SURF_P2));
3133                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3134                                 PIPE_CONFIG(ADDR_SURF_P2) |
3135                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3136                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3137                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3138                                  PIPE_CONFIG(ADDR_SURF_P2) |
3139                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3140                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3142                                  PIPE_CONFIG(ADDR_SURF_P2) |
3143                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3144                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3145                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3146                                  PIPE_CONFIG(ADDR_SURF_P2) |
3147                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3148                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3149                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3150                                  PIPE_CONFIG(ADDR_SURF_P2) |
3151                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3152                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3153                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3154                                  PIPE_CONFIG(ADDR_SURF_P2) |
3155                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3156                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3157                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3158                                  PIPE_CONFIG(ADDR_SURF_P2) |
3159                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3160                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3161                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3162                                  PIPE_CONFIG(ADDR_SURF_P2) |
3163                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3164                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3165                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3166                                  PIPE_CONFIG(ADDR_SURF_P2) |
3167                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3168                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3169                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3170                                  PIPE_CONFIG(ADDR_SURF_P2) |
3171                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3172                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3173                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3174                                  PIPE_CONFIG(ADDR_SURF_P2) |
3175                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3176                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3177                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3178                                  PIPE_CONFIG(ADDR_SURF_P2) |
3179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3181                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3182                                  PIPE_CONFIG(ADDR_SURF_P2) |
3183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3185                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3186                                  PIPE_CONFIG(ADDR_SURF_P2) |
3187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3189                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3190                                  PIPE_CONFIG(ADDR_SURF_P2) |
3191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3193                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3194                                  PIPE_CONFIG(ADDR_SURF_P2) |
3195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3197                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3198                                  PIPE_CONFIG(ADDR_SURF_P2) |
3199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3201                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202                                  PIPE_CONFIG(ADDR_SURF_P2) |
3203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3205
3206                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3207                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3208                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3209                                 NUM_BANKS(ADDR_SURF_8_BANK));
3210                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3211                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3212                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3213                                 NUM_BANKS(ADDR_SURF_8_BANK));
3214                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3215                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3216                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3217                                 NUM_BANKS(ADDR_SURF_8_BANK));
3218                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3219                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3220                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3221                                 NUM_BANKS(ADDR_SURF_8_BANK));
3222                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3223                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3224                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3225                                 NUM_BANKS(ADDR_SURF_8_BANK));
3226                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3227                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3228                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3229                                 NUM_BANKS(ADDR_SURF_8_BANK));
3230                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3231                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3232                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3233                                 NUM_BANKS(ADDR_SURF_8_BANK));
3234                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3235                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3236                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3237                                 NUM_BANKS(ADDR_SURF_16_BANK));
3238                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3239                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3240                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241                                 NUM_BANKS(ADDR_SURF_16_BANK));
3242                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3243                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3244                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3245                                  NUM_BANKS(ADDR_SURF_16_BANK));
3246                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3247                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3248                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3249                                  NUM_BANKS(ADDR_SURF_16_BANK));
3250                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3251                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3252                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253                                  NUM_BANKS(ADDR_SURF_16_BANK));
3254                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3255                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3256                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3257                                  NUM_BANKS(ADDR_SURF_16_BANK));
3258                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3260                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3261                                  NUM_BANKS(ADDR_SURF_8_BANK));
3262
3263                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3264                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3265                             reg_offset != 23)
3266                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3267
3268                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3269                         if (reg_offset != 7)
3270                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3271
3272                 break;
3273         default:
3274                 dev_warn(adev->dev,
3275                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3276                          adev->asic_type);
3277
3278         case CHIP_CARRIZO:
3279                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3280                                 PIPE_CONFIG(ADDR_SURF_P2) |
3281                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3282                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3283                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3284                                 PIPE_CONFIG(ADDR_SURF_P2) |
3285                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3286                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3287                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288                                 PIPE_CONFIG(ADDR_SURF_P2) |
3289                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3290                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3291                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292                                 PIPE_CONFIG(ADDR_SURF_P2) |
3293                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3294                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3295                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3296                                 PIPE_CONFIG(ADDR_SURF_P2) |
3297                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3298                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3299                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3300                                 PIPE_CONFIG(ADDR_SURF_P2) |
3301                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3302                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3303                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3304                                 PIPE_CONFIG(ADDR_SURF_P2) |
3305                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3307                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3308                                 PIPE_CONFIG(ADDR_SURF_P2));
3309                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3310                                 PIPE_CONFIG(ADDR_SURF_P2) |
3311                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3312                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3313                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3314                                  PIPE_CONFIG(ADDR_SURF_P2) |
3315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3317                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3318                                  PIPE_CONFIG(ADDR_SURF_P2) |
3319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3321                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3322                                  PIPE_CONFIG(ADDR_SURF_P2) |
3323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3325                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3326                                  PIPE_CONFIG(ADDR_SURF_P2) |
3327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3329                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3330                                  PIPE_CONFIG(ADDR_SURF_P2) |
3331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3333                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3334                                  PIPE_CONFIG(ADDR_SURF_P2) |
3335                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3336                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3337                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3338                                  PIPE_CONFIG(ADDR_SURF_P2) |
3339                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3340                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3341                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3342                                  PIPE_CONFIG(ADDR_SURF_P2) |
3343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3345                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3346                                  PIPE_CONFIG(ADDR_SURF_P2) |
3347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3349                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3350                                  PIPE_CONFIG(ADDR_SURF_P2) |
3351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3353                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3354                                  PIPE_CONFIG(ADDR_SURF_P2) |
3355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3357                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3358                                  PIPE_CONFIG(ADDR_SURF_P2) |
3359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3361                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3362                                  PIPE_CONFIG(ADDR_SURF_P2) |
3363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3365                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3366                                  PIPE_CONFIG(ADDR_SURF_P2) |
3367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3369                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3370                                  PIPE_CONFIG(ADDR_SURF_P2) |
3371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3373                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3374                                  PIPE_CONFIG(ADDR_SURF_P2) |
3375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3377                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3378                                  PIPE_CONFIG(ADDR_SURF_P2) |
3379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3381
3382                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3384                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3385                                 NUM_BANKS(ADDR_SURF_8_BANK));
3386                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3387                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3388                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3389                                 NUM_BANKS(ADDR_SURF_8_BANK));
3390                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3391                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3392                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3393                                 NUM_BANKS(ADDR_SURF_8_BANK));
3394                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3395                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3396                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3397                                 NUM_BANKS(ADDR_SURF_8_BANK));
3398                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3400                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3401                                 NUM_BANKS(ADDR_SURF_8_BANK));
3402                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3405                                 NUM_BANKS(ADDR_SURF_8_BANK));
3406                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3407                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3408                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3409                                 NUM_BANKS(ADDR_SURF_8_BANK));
3410                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3411                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3412                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3413                                 NUM_BANKS(ADDR_SURF_16_BANK));
3414                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3415                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3416                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417                                 NUM_BANKS(ADDR_SURF_16_BANK));
3418                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3419                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3420                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3421                                  NUM_BANKS(ADDR_SURF_16_BANK));
3422                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3423                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3424                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3425                                  NUM_BANKS(ADDR_SURF_16_BANK));
3426                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3427                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3428                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3429                                  NUM_BANKS(ADDR_SURF_16_BANK));
3430                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3432                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3433                                  NUM_BANKS(ADDR_SURF_16_BANK));
3434                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3435                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3436                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3437                                  NUM_BANKS(ADDR_SURF_8_BANK));
3438
3439                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3440                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3441                             reg_offset != 23)
3442                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3443
3444                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3445                         if (reg_offset != 7)
3446                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3447
3448                 break;
3449         }
3450 }
3451
3452 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3453                                   u32 se_num, u32 sh_num, u32 instance)
3454 {
3455         u32 data;
3456
3457         if (instance == 0xffffffff)
3458                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3459         else
3460                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3461
3462         if (se_num == 0xffffffff)
3463                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3464         else
3465                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3466
3467         if (sh_num == 0xffffffff)
3468                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3469         else
3470                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3471
3472         WREG32(mmGRBM_GFX_INDEX, data);
3473 }
3474
3475 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3476 {
3477         return (u32)((1ULL << bit_width) - 1);
3478 }
3479
3480 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3481 {
3482         u32 data, mask;
3483
3484         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3485                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3486
3487         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3488
3489         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3490                                        adev->gfx.config.max_sh_per_se);
3491
3492         return (~data) & mask;
3493 }
3494
3495 static void
3496 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3497 {
3498         switch (adev->asic_type) {
3499         case CHIP_FIJI:
3500                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3501                           RB_XSEL2(1) | PKR_MAP(2) |
3502                           PKR_XSEL(1) | PKR_YSEL(1) |
3503                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3504                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3505                            SE_PAIR_YSEL(2);
3506                 break;
3507         case CHIP_TONGA:
3508         case CHIP_POLARIS10:
3509                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3510                           SE_XSEL(1) | SE_YSEL(1);
3511                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3512                            SE_PAIR_YSEL(2);
3513                 break;
3514         case CHIP_TOPAZ:
3515         case CHIP_CARRIZO:
3516                 *rconf |= RB_MAP_PKR0(2);
3517                 *rconf1 |= 0x0;
3518                 break;
3519         case CHIP_POLARIS11:
3520                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3521                           SE_XSEL(1) | SE_YSEL(1);
3522                 *rconf1 |= 0x0;
3523                 break;
3524         case CHIP_STONEY:
3525                 *rconf |= 0x0;
3526                 *rconf1 |= 0x0;
3527                 break;
3528         default:
3529                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3530                 break;
3531         }
3532 }
3533
3534 static void
3535 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3536                                         u32 raster_config, u32 raster_config_1,
3537                                         unsigned rb_mask, unsigned num_rb)
3538 {
3539         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3540         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3541         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3542         unsigned rb_per_se = num_rb / num_se;
3543         unsigned se_mask[4];
3544         unsigned se;
3545
3546         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3547         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3548         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3549         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3550
3551         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3552         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3553         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3554
3555         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3556                              (!se_mask[2] && !se_mask[3]))) {
3557                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3558
3559                 if (!se_mask[0] && !se_mask[1]) {
3560                         raster_config_1 |=
3561                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3562                 } else {
3563                         raster_config_1 |=
3564                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3565                 }
3566         }
3567
3568         for (se = 0; se < num_se; se++) {
3569                 unsigned raster_config_se = raster_config;
3570                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3571                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3572                 int idx = (se / 2) * 2;
3573
3574                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3575                         raster_config_se &= ~SE_MAP_MASK;
3576
3577                         if (!se_mask[idx]) {
3578                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3579                         } else {
3580                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3581                         }
3582                 }
3583
3584                 pkr0_mask &= rb_mask;
3585                 pkr1_mask &= rb_mask;
3586                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3587                         raster_config_se &= ~PKR_MAP_MASK;
3588
3589                         if (!pkr0_mask) {
3590                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3591                         } else {
3592                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3593                         }
3594                 }
3595
3596                 if (rb_per_se >= 2) {
3597                         unsigned rb0_mask = 1 << (se * rb_per_se);
3598                         unsigned rb1_mask = rb0_mask << 1;
3599
3600                         rb0_mask &= rb_mask;
3601                         rb1_mask &= rb_mask;
3602                         if (!rb0_mask || !rb1_mask) {
3603                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3604
3605                                 if (!rb0_mask) {
3606                                         raster_config_se |=
3607                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3608                                 } else {
3609                                         raster_config_se |=
3610                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3611                                 }
3612                         }
3613
3614                         if (rb_per_se > 2) {
3615                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3616                                 rb1_mask = rb0_mask << 1;
3617                                 rb0_mask &= rb_mask;
3618                                 rb1_mask &= rb_mask;
3619                                 if (!rb0_mask || !rb1_mask) {
3620                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3621
3622                                         if (!rb0_mask) {
3623                                                 raster_config_se |=
3624                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3625                                         } else {
3626                                                 raster_config_se |=
3627                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3628                                         }
3629                                 }
3630                         }
3631                 }
3632
3633                 /* GRBM_GFX_INDEX has a different offset on VI */
3634                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3635                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3636                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3637         }
3638
3639         /* GRBM_GFX_INDEX has a different offset on VI */
3640         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3641 }
3642
3643 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3644 {
3645         int i, j;
3646         u32 data;
3647         u32 raster_config = 0, raster_config_1 = 0;
3648         u32 active_rbs = 0;
3649         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3650                                         adev->gfx.config.max_sh_per_se;
3651         unsigned num_rb_pipes;
3652
3653         mutex_lock(&adev->grbm_idx_mutex);
3654         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3655                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3656                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3657                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3658                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3659                                                rb_bitmap_width_per_sh);
3660                 }
3661         }
3662         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3663
3664         adev->gfx.config.backend_enable_mask = active_rbs;
3665         adev->gfx.config.num_rbs = hweight32(active_rbs);
3666
3667         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3668                              adev->gfx.config.max_shader_engines, 16);
3669
3670         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3671
3672         if (!adev->gfx.config.backend_enable_mask ||
3673                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3674                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3675                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3676         } else {
3677                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3678                                                         adev->gfx.config.backend_enable_mask,
3679                                                         num_rb_pipes);
3680         }
3681
3682         mutex_unlock(&adev->grbm_idx_mutex);
3683 }
3684
3685 /**
3686  * gfx_v8_0_init_compute_vmid - gart enable
3687  *
3688  * @rdev: amdgpu_device pointer
3689  *
3690  * Initialize compute vmid sh_mem registers
3691  *
3692  */
3693 #define DEFAULT_SH_MEM_BASES    (0x6000)
3694 #define FIRST_COMPUTE_VMID      (8)
3695 #define LAST_COMPUTE_VMID       (16)
3696 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3697 {
3698         int i;
3699         uint32_t sh_mem_config;
3700         uint32_t sh_mem_bases;
3701
3702         /*
3703          * Configure apertures:
3704          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3705          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3706          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3707          */
3708         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3709
3710         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3711                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3712                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3713                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3714                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3715                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3716
3717         mutex_lock(&adev->srbm_mutex);
3718         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3719                 vi_srbm_select(adev, 0, 0, 0, i);
3720                 /* CP and shaders */
3721                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3722                 WREG32(mmSH_MEM_APE1_BASE, 1);
3723                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3724                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3725         }
3726         vi_srbm_select(adev, 0, 0, 0, 0);
3727         mutex_unlock(&adev->srbm_mutex);
3728 }
3729
3730 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3731 {
3732         u32 tmp;
3733         int i;
3734
3735         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3736         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3737         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3738         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3739
3740         gfx_v8_0_tiling_mode_table_init(adev);
3741         gfx_v8_0_setup_rb(adev);
3742         gfx_v8_0_get_cu_info(adev);
3743
3744         /* XXX SH_MEM regs */
3745         /* where to put LDS, scratch, GPUVM in FSA64 space */
3746         mutex_lock(&adev->srbm_mutex);
3747         for (i = 0; i < 16; i++) {
3748                 vi_srbm_select(adev, 0, 0, 0, i);
3749                 /* CP and shaders */
3750                 if (i == 0) {
3751                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3752                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3753                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3754                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3755                         WREG32(mmSH_MEM_CONFIG, tmp);
3756                 } else {
3757                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3758                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3759                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3760                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3761                         WREG32(mmSH_MEM_CONFIG, tmp);
3762                 }
3763
3764                 WREG32(mmSH_MEM_APE1_BASE, 1);
3765                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3766                 WREG32(mmSH_MEM_BASES, 0);
3767         }
3768         vi_srbm_select(adev, 0, 0, 0, 0);
3769         mutex_unlock(&adev->srbm_mutex);
3770
3771         gfx_v8_0_init_compute_vmid(adev);
3772
3773         mutex_lock(&adev->grbm_idx_mutex);
3774         /*
3775          * making sure that the following register writes will be broadcasted
3776          * to all the shaders
3777          */
3778         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3779
3780         WREG32(mmPA_SC_FIFO_SIZE,
3781                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3782                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3783                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3784                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3785                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3786                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3787                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3788                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3789         mutex_unlock(&adev->grbm_idx_mutex);
3790
3791 }
3792
3793 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3794 {
3795         u32 i, j, k;
3796         u32 mask;
3797
3798         mutex_lock(&adev->grbm_idx_mutex);
3799         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3800                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3801                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3802                         for (k = 0; k < adev->usec_timeout; k++) {
3803                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3804                                         break;
3805                                 udelay(1);
3806                         }
3807                 }
3808         }
3809         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3810         mutex_unlock(&adev->grbm_idx_mutex);
3811
3812         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3813                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3814                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3815                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3816         for (k = 0; k < adev->usec_timeout; k++) {
3817                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3818                         break;
3819                 udelay(1);
3820         }
3821 }
3822
3823 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3824                                                bool enable)
3825 {
3826         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3827
3828         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3829         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3830         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3831         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3832
3833         WREG32(mmCP_INT_CNTL_RING0, tmp);
3834 }
3835
3836 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3837 {
3838         /* csib */
3839         WREG32(mmRLC_CSIB_ADDR_HI,
3840                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3841         WREG32(mmRLC_CSIB_ADDR_LO,
3842                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3843         WREG32(mmRLC_CSIB_LENGTH,
3844                         adev->gfx.rlc.clear_state_size);
3845 }
3846
3847 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3848                                 int ind_offset,
3849                                 int list_size,
3850                                 int *unique_indices,
3851                                 int *indices_count,
3852                                 int max_indices,
3853                                 int *ind_start_offsets,
3854                                 int *offset_count,
3855                                 int max_offset)
3856 {
3857         int indices;
3858         bool new_entry = true;
3859
3860         for (; ind_offset < list_size; ind_offset++) {
3861
3862                 if (new_entry) {
3863                         new_entry = false;
3864                         ind_start_offsets[*offset_count] = ind_offset;
3865                         *offset_count = *offset_count + 1;
3866                         BUG_ON(*offset_count >= max_offset);
3867                 }
3868
3869                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3870                         new_entry = true;
3871                         continue;
3872                 }
3873
3874                 ind_offset += 2;
3875
3876                 /* look for the matching indice */
3877                 for (indices = 0;
3878                         indices < *indices_count;
3879                         indices++) {
3880                         if (unique_indices[indices] ==
3881                                 register_list_format[ind_offset])
3882                                 break;
3883                 }
3884
3885                 if (indices >= *indices_count) {
3886                         unique_indices[*indices_count] =
3887                                 register_list_format[ind_offset];
3888                         indices = *indices_count;
3889                         *indices_count = *indices_count + 1;
3890                         BUG_ON(*indices_count >= max_indices);
3891                 }
3892
3893                 register_list_format[ind_offset] = indices;
3894         }
3895 }
3896
3897 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3898 {
3899         int i, temp, data;
3900         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3901         int indices_count = 0;
3902         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3903         int offset_count = 0;
3904
3905         int list_size;
3906         unsigned int *register_list_format =
3907                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3908         if (register_list_format == NULL)
3909                 return -ENOMEM;
3910         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3911                         adev->gfx.rlc.reg_list_format_size_bytes);
3912
3913         gfx_v8_0_parse_ind_reg_list(register_list_format,
3914                                 RLC_FormatDirectRegListLength,
3915                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3916                                 unique_indices,
3917                                 &indices_count,
3918                                 sizeof(unique_indices) / sizeof(int),
3919                                 indirect_start_offsets,
3920                                 &offset_count,
3921                                 sizeof(indirect_start_offsets)/sizeof(int));
3922
3923         /* save and restore list */
3924         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3925
3926         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3927         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3928                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3929
3930         /* indirect list */
3931         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3932         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3933                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3934
3935         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3936         list_size = list_size >> 1;
3937         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3938         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3939
3940         /* starting offsets starts */
3941         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3942                 adev->gfx.rlc.starting_offsets_start);
3943         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3944                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3945                                 indirect_start_offsets[i]);
3946
3947         /* unique indices */
3948         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3949         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3950         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3951                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3952                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3953         }
3954         kfree(register_list_format);
3955
3956         return 0;
3957 }
3958
3959 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3960 {
3961         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3962 }
3963
3964 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3965 {
3966         uint32_t data;
3967
3968         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3969                               AMD_PG_SUPPORT_GFX_SMG |
3970                               AMD_PG_SUPPORT_GFX_DMG)) {
3971                 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3972
3973                 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3974                 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3975                 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3976                 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3977                 WREG32(mmRLC_PG_DELAY, data);
3978
3979                 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3980                 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3981         }
3982 }
3983
3984 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3985                                                 bool enable)
3986 {
3987         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3988 }
3989
3990 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3991                                                   bool enable)
3992 {
3993         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3994 }
3995
3996 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3997 {
3998         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
3999 }
4000
4001 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4002 {
4003         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4004                               AMD_PG_SUPPORT_GFX_SMG |
4005                               AMD_PG_SUPPORT_GFX_DMG |
4006                               AMD_PG_SUPPORT_CP |
4007                               AMD_PG_SUPPORT_GDS |
4008                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
4009                 gfx_v8_0_init_csb(adev);
4010                 gfx_v8_0_init_save_restore_list(adev);
4011                 gfx_v8_0_enable_save_restore_machine(adev);
4012
4013                 if ((adev->asic_type == CHIP_CARRIZO) ||
4014                     (adev->asic_type == CHIP_STONEY)) {
4015                         WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4016                         gfx_v8_0_init_power_gating(adev);
4017                         WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4018                         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4019                                 cz_enable_sck_slow_down_on_power_up(adev, true);
4020                                 cz_enable_sck_slow_down_on_power_down(adev, true);
4021                         } else {
4022                                 cz_enable_sck_slow_down_on_power_up(adev, false);
4023                                 cz_enable_sck_slow_down_on_power_down(adev, false);
4024                         }
4025                         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4026                                 cz_enable_cp_power_gating(adev, true);
4027                         else
4028                                 cz_enable_cp_power_gating(adev, false);
4029                 } else if (adev->asic_type == CHIP_POLARIS11) {
4030                         gfx_v8_0_init_power_gating(adev);
4031                 }
4032         }
4033 }
4034
4035 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4036 {
4037         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4038
4039         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4040         gfx_v8_0_wait_for_rlc_serdes(adev);
4041 }
4042
4043 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4044 {
4045         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4046         udelay(50);
4047
4048         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4049         udelay(50);
4050 }
4051
4052 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4053 {
4054         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4055
4056         /* carrizo do enable cp interrupt after cp inited */
4057         if (!(adev->flags & AMD_IS_APU))
4058                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4059
4060         udelay(50);
4061 }
4062
4063 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4064 {
4065         const struct rlc_firmware_header_v2_0 *hdr;
4066         const __le32 *fw_data;
4067         unsigned i, fw_size;
4068
4069         if (!adev->gfx.rlc_fw)
4070                 return -EINVAL;
4071
4072         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4073         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4074
4075         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4076                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4077         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4078
4079         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4080         for (i = 0; i < fw_size; i++)
4081                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4082         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4083
4084         return 0;
4085 }
4086
4087 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4088 {
4089         int r;
4090         u32 tmp;
4091
4092         gfx_v8_0_rlc_stop(adev);
4093
4094         /* disable CG */
4095         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4096         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4097                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4098         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4099         if (adev->asic_type == CHIP_POLARIS11 ||
4100             adev->asic_type == CHIP_POLARIS10) {
4101                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4102                 tmp &= ~0x3;
4103                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4104         }
4105
4106         /* disable PG */
4107         WREG32(mmRLC_PG_CNTL, 0);
4108
4109         gfx_v8_0_rlc_reset(adev);
4110         gfx_v8_0_init_pg(adev);
4111
4112         if (!adev->pp_enabled) {
4113                 if (!adev->firmware.smu_load) {
4114                         /* legacy rlc firmware loading */
4115                         r = gfx_v8_0_rlc_load_microcode(adev);
4116                         if (r)
4117                                 return r;
4118                 } else {
4119                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4120                                                         AMDGPU_UCODE_ID_RLC_G);
4121                         if (r)
4122                                 return -EINVAL;
4123                 }
4124         }
4125
4126         gfx_v8_0_rlc_start(adev);
4127
4128         return 0;
4129 }
4130
4131 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4132 {
4133         int i;
4134         u32 tmp = RREG32(mmCP_ME_CNTL);
4135
4136         if (enable) {
4137                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4138                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4139                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4140         } else {
4141                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4142                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4143                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4144                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4145                         adev->gfx.gfx_ring[i].ready = false;
4146         }
4147         WREG32(mmCP_ME_CNTL, tmp);
4148         udelay(50);
4149 }
4150
4151 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4152 {
4153         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4154         const struct gfx_firmware_header_v1_0 *ce_hdr;
4155         const struct gfx_firmware_header_v1_0 *me_hdr;
4156         const __le32 *fw_data;
4157         unsigned i, fw_size;
4158
4159         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4160                 return -EINVAL;
4161
4162         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4163                 adev->gfx.pfp_fw->data;
4164         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4165                 adev->gfx.ce_fw->data;
4166         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4167                 adev->gfx.me_fw->data;
4168
4169         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4170         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4171         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4172
4173         gfx_v8_0_cp_gfx_enable(adev, false);
4174
4175         /* PFP */
4176         fw_data = (const __le32 *)
4177                 (adev->gfx.pfp_fw->data +
4178                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4179         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4180         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4181         for (i = 0; i < fw_size; i++)
4182                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4183         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4184
4185         /* CE */
4186         fw_data = (const __le32 *)
4187                 (adev->gfx.ce_fw->data +
4188                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4189         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4190         WREG32(mmCP_CE_UCODE_ADDR, 0);
4191         for (i = 0; i < fw_size; i++)
4192                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4193         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4194
4195         /* ME */
4196         fw_data = (const __le32 *)
4197                 (adev->gfx.me_fw->data +
4198                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4199         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4200         WREG32(mmCP_ME_RAM_WADDR, 0);
4201         for (i = 0; i < fw_size; i++)
4202                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4203         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4204
4205         return 0;
4206 }
4207
4208 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4209 {
4210         u32 count = 0;
4211         const struct cs_section_def *sect = NULL;
4212         const struct cs_extent_def *ext = NULL;
4213
4214         /* begin clear state */
4215         count += 2;
4216         /* context control state */
4217         count += 3;
4218
4219         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4220                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4221                         if (sect->id == SECT_CONTEXT)
4222                                 count += 2 + ext->reg_count;
4223                         else
4224                                 return 0;
4225                 }
4226         }
4227         /* pa_sc_raster_config/pa_sc_raster_config1 */
4228         count += 4;
4229         /* end clear state */
4230         count += 2;
4231         /* clear state */
4232         count += 2;
4233
4234         return count;
4235 }
4236
4237 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4238 {
4239         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4240         const struct cs_section_def *sect = NULL;
4241         const struct cs_extent_def *ext = NULL;
4242         int r, i;
4243
4244         /* init the CP */
4245         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4246         WREG32(mmCP_ENDIAN_SWAP, 0);
4247         WREG32(mmCP_DEVICE_ID, 1);
4248
4249         gfx_v8_0_cp_gfx_enable(adev, true);
4250
4251         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4252         if (r) {
4253                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4254                 return r;
4255         }
4256
4257         /* clear state buffer */
4258         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4259         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4260
4261         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4262         amdgpu_ring_write(ring, 0x80000000);
4263         amdgpu_ring_write(ring, 0x80000000);
4264
4265         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4266                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4267                         if (sect->id == SECT_CONTEXT) {
4268                                 amdgpu_ring_write(ring,
4269                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4270                                                ext->reg_count));
4271                                 amdgpu_ring_write(ring,
4272                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4273                                 for (i = 0; i < ext->reg_count; i++)
4274                                         amdgpu_ring_write(ring, ext->extent[i]);
4275                         }
4276                 }
4277         }
4278
4279         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4280         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4281         switch (adev->asic_type) {
4282         case CHIP_TONGA:
4283         case CHIP_POLARIS10:
4284                 amdgpu_ring_write(ring, 0x16000012);
4285                 amdgpu_ring_write(ring, 0x0000002A);
4286                 break;
4287         case CHIP_POLARIS11:
4288                 amdgpu_ring_write(ring, 0x16000012);
4289                 amdgpu_ring_write(ring, 0x00000000);
4290                 break;
4291         case CHIP_FIJI:
4292                 amdgpu_ring_write(ring, 0x3a00161a);
4293                 amdgpu_ring_write(ring, 0x0000002e);
4294                 break;
4295         case CHIP_CARRIZO:
4296                 amdgpu_ring_write(ring, 0x00000002);
4297                 amdgpu_ring_write(ring, 0x00000000);
4298                 break;
4299         case CHIP_TOPAZ:
4300                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4301                                 0x00000000 : 0x00000002);
4302                 amdgpu_ring_write(ring, 0x00000000);
4303                 break;
4304         case CHIP_STONEY:
4305                 amdgpu_ring_write(ring, 0x00000000);
4306                 amdgpu_ring_write(ring, 0x00000000);
4307                 break;
4308         default:
4309                 BUG();
4310         }
4311
4312         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4313         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4314
4315         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4316         amdgpu_ring_write(ring, 0);
4317
4318         /* init the CE partitions */
4319         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4320         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4321         amdgpu_ring_write(ring, 0x8000);
4322         amdgpu_ring_write(ring, 0x8000);
4323
4324         amdgpu_ring_commit(ring);
4325
4326         return 0;
4327 }
4328
4329 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4330 {
4331         struct amdgpu_ring *ring;
4332         u32 tmp;
4333         u32 rb_bufsz;
4334         u64 rb_addr, rptr_addr;
4335         int r;
4336
4337         /* Set the write pointer delay */
4338         WREG32(mmCP_RB_WPTR_DELAY, 0);
4339
4340         /* set the RB to use vmid 0 */
4341         WREG32(mmCP_RB_VMID, 0);
4342
4343         /* Set ring buffer size */
4344         ring = &adev->gfx.gfx_ring[0];
4345         rb_bufsz = order_base_2(ring->ring_size / 8);
4346         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4347         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4348         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4349         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4350 #ifdef __BIG_ENDIAN
4351         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4352 #endif
4353         WREG32(mmCP_RB0_CNTL, tmp);
4354
4355         /* Initialize the ring buffer's read and write pointers */
4356         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4357         ring->wptr = 0;
4358         WREG32(mmCP_RB0_WPTR, ring->wptr);
4359
4360         /* set the wb address wether it's enabled or not */
4361         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4362         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4363         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4364
4365         mdelay(1);
4366         WREG32(mmCP_RB0_CNTL, tmp);
4367
4368         rb_addr = ring->gpu_addr >> 8;
4369         WREG32(mmCP_RB0_BASE, rb_addr);
4370         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4371
4372         /* no gfx doorbells on iceland */
4373         if (adev->asic_type != CHIP_TOPAZ) {
4374                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4375                 if (ring->use_doorbell) {
4376                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4377                                             DOORBELL_OFFSET, ring->doorbell_index);
4378                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4379                                             DOORBELL_HIT, 0);
4380                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4381                                             DOORBELL_EN, 1);
4382                 } else {
4383                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4384                                             DOORBELL_EN, 0);
4385                 }
4386                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4387
4388                 if (adev->asic_type == CHIP_TONGA) {
4389                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4390                                             DOORBELL_RANGE_LOWER,
4391                                             AMDGPU_DOORBELL_GFX_RING0);
4392                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4393
4394                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4395                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4396                 }
4397
4398         }
4399
4400         /* start the ring */
4401         gfx_v8_0_cp_gfx_start(adev);
4402         ring->ready = true;
4403         r = amdgpu_ring_test_ring(ring);
4404         if (r)
4405                 ring->ready = false;
4406
4407         return r;
4408 }
4409
4410 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4411 {
4412         int i;
4413
4414         if (enable) {
4415                 WREG32(mmCP_MEC_CNTL, 0);
4416         } else {
4417                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4418                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4419                         adev->gfx.compute_ring[i].ready = false;
4420         }
4421         udelay(50);
4422 }
4423
4424 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4425 {
4426         const struct gfx_firmware_header_v1_0 *mec_hdr;
4427         const __le32 *fw_data;
4428         unsigned i, fw_size;
4429
4430         if (!adev->gfx.mec_fw)
4431                 return -EINVAL;
4432
4433         gfx_v8_0_cp_compute_enable(adev, false);
4434
4435         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4436         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4437
4438         fw_data = (const __le32 *)
4439                 (adev->gfx.mec_fw->data +
4440                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4441         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4442
4443         /* MEC1 */
4444         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4445         for (i = 0; i < fw_size; i++)
4446                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4447         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4448
4449         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4450         if (adev->gfx.mec2_fw) {
4451                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4452
4453                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4454                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4455
4456                 fw_data = (const __le32 *)
4457                         (adev->gfx.mec2_fw->data +
4458                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4459                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4460
4461                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4462                 for (i = 0; i < fw_size; i++)
4463                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4464                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4465         }
4466
4467         return 0;
4468 }
4469
4470 struct vi_mqd {
4471         uint32_t header;  /* ordinal0 */
4472         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4473         uint32_t compute_dim_x;  /* ordinal2 */
4474         uint32_t compute_dim_y;  /* ordinal3 */
4475         uint32_t compute_dim_z;  /* ordinal4 */
4476         uint32_t compute_start_x;  /* ordinal5 */
4477         uint32_t compute_start_y;  /* ordinal6 */
4478         uint32_t compute_start_z;  /* ordinal7 */
4479         uint32_t compute_num_thread_x;  /* ordinal8 */
4480         uint32_t compute_num_thread_y;  /* ordinal9 */
4481         uint32_t compute_num_thread_z;  /* ordinal10 */
4482         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4483         uint32_t compute_perfcount_enable;  /* ordinal12 */
4484         uint32_t compute_pgm_lo;  /* ordinal13 */
4485         uint32_t compute_pgm_hi;  /* ordinal14 */
4486         uint32_t compute_tba_lo;  /* ordinal15 */
4487         uint32_t compute_tba_hi;  /* ordinal16 */
4488         uint32_t compute_tma_lo;  /* ordinal17 */
4489         uint32_t compute_tma_hi;  /* ordinal18 */
4490         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4491         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4492         uint32_t compute_vmid;  /* ordinal21 */
4493         uint32_t compute_resource_limits;  /* ordinal22 */
4494         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4495         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4496         uint32_t compute_tmpring_size;  /* ordinal25 */
4497         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4498         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4499         uint32_t compute_restart_x;  /* ordinal28 */
4500         uint32_t compute_restart_y;  /* ordinal29 */
4501         uint32_t compute_restart_z;  /* ordinal30 */
4502         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4503         uint32_t compute_misc_reserved;  /* ordinal32 */
4504         uint32_t compute_dispatch_id;  /* ordinal33 */
4505         uint32_t compute_threadgroup_id;  /* ordinal34 */
4506         uint32_t compute_relaunch;  /* ordinal35 */
4507         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4508         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4509         uint32_t compute_wave_restore_control;  /* ordinal38 */
4510         uint32_t reserved9;  /* ordinal39 */
4511         uint32_t reserved10;  /* ordinal40 */
4512         uint32_t reserved11;  /* ordinal41 */
4513         uint32_t reserved12;  /* ordinal42 */
4514         uint32_t reserved13;  /* ordinal43 */
4515         uint32_t reserved14;  /* ordinal44 */
4516         uint32_t reserved15;  /* ordinal45 */
4517         uint32_t reserved16;  /* ordinal46 */
4518         uint32_t reserved17;  /* ordinal47 */
4519         uint32_t reserved18;  /* ordinal48 */
4520         uint32_t reserved19;  /* ordinal49 */
4521         uint32_t reserved20;  /* ordinal50 */
4522         uint32_t reserved21;  /* ordinal51 */
4523         uint32_t reserved22;  /* ordinal52 */
4524         uint32_t reserved23;  /* ordinal53 */
4525         uint32_t reserved24;  /* ordinal54 */
4526         uint32_t reserved25;  /* ordinal55 */
4527         uint32_t reserved26;  /* ordinal56 */
4528         uint32_t reserved27;  /* ordinal57 */
4529         uint32_t reserved28;  /* ordinal58 */
4530         uint32_t reserved29;  /* ordinal59 */
4531         uint32_t reserved30;  /* ordinal60 */
4532         uint32_t reserved31;  /* ordinal61 */
4533         uint32_t reserved32;  /* ordinal62 */
4534         uint32_t reserved33;  /* ordinal63 */
4535         uint32_t reserved34;  /* ordinal64 */
4536         uint32_t compute_user_data_0;  /* ordinal65 */
4537         uint32_t compute_user_data_1;  /* ordinal66 */
4538         uint32_t compute_user_data_2;  /* ordinal67 */
4539         uint32_t compute_user_data_3;  /* ordinal68 */
4540         uint32_t compute_user_data_4;  /* ordinal69 */
4541         uint32_t compute_user_data_5;  /* ordinal70 */
4542         uint32_t compute_user_data_6;  /* ordinal71 */
4543         uint32_t compute_user_data_7;  /* ordinal72 */
4544         uint32_t compute_user_data_8;  /* ordinal73 */
4545         uint32_t compute_user_data_9;  /* ordinal74 */
4546         uint32_t compute_user_data_10;  /* ordinal75 */
4547         uint32_t compute_user_data_11;  /* ordinal76 */
4548         uint32_t compute_user_data_12;  /* ordinal77 */
4549         uint32_t compute_user_data_13;  /* ordinal78 */
4550         uint32_t compute_user_data_14;  /* ordinal79 */
4551         uint32_t compute_user_data_15;  /* ordinal80 */
4552         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4553         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4554         uint32_t reserved35;  /* ordinal83 */
4555         uint32_t reserved36;  /* ordinal84 */
4556         uint32_t reserved37;  /* ordinal85 */
4557         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4558         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4559         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4560         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4561         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4562         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4563         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4564         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4565         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4566         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4567         uint32_t reserved38;  /* ordinal96 */
4568         uint32_t reserved39;  /* ordinal97 */
4569         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4570         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4571         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4572         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4573         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4574         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4575         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4576         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4577         uint32_t reserved40;  /* ordinal106 */
4578         uint32_t reserved41;  /* ordinal107 */
4579         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4580         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4581         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4582         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4583         uint32_t reserved42;  /* ordinal112 */
4584         uint32_t reserved43;  /* ordinal113 */
4585         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4586         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4587         uint32_t cp_packet_id_lo;  /* ordinal116 */
4588         uint32_t cp_packet_id_hi;  /* ordinal117 */
4589         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4590         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4591         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4592         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4593         uint32_t gds_save_mask_lo;  /* ordinal122 */
4594         uint32_t gds_save_mask_hi;  /* ordinal123 */
4595         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4596         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4597         uint32_t reserved44;  /* ordinal126 */
4598         uint32_t reserved45;  /* ordinal127 */
4599         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4600         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4601         uint32_t cp_hqd_active;  /* ordinal130 */
4602         uint32_t cp_hqd_vmid;  /* ordinal131 */
4603         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4604         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4605         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4606         uint32_t cp_hqd_quantum;  /* ordinal135 */
4607         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4608         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4609         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4610         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4611         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4612         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4613         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4614         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4615         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4616         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4617         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4618         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4619         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4620         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4621         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4622         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4623         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4624         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4625         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4626         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4627         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4628         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4629         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4630         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4631         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4632         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4633         uint32_t cp_mqd_control;  /* ordinal162 */
4634         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4635         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4636         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4637         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4638         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4639         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4640         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4641         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4642         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4643         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4644         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4645         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4646         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4647         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4648         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4649         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4650         uint32_t cp_hqd_error;  /* ordinal179 */
4651         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4652         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4653         uint32_t reserved46;  /* ordinal182 */
4654         uint32_t reserved47;  /* ordinal183 */
4655         uint32_t reserved48;  /* ordinal184 */
4656         uint32_t reserved49;  /* ordinal185 */
4657         uint32_t reserved50;  /* ordinal186 */
4658         uint32_t reserved51;  /* ordinal187 */
4659         uint32_t reserved52;  /* ordinal188 */
4660         uint32_t reserved53;  /* ordinal189 */
4661         uint32_t reserved54;  /* ordinal190 */
4662         uint32_t reserved55;  /* ordinal191 */
4663         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4664         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4665         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4666         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4667         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4668         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4669         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4670         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4671         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4672         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4673         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4674         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4675         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4676         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4677         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4678         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4679         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4680         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4681         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4682         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4683         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4684         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4685         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4686         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4687         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4688         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4689         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4690         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4691         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4692         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4693         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4694         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4695         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4696         uint32_t reserved56;  /* ordinal225 */
4697         uint32_t reserved57;  /* ordinal226 */
4698         uint32_t reserved58;  /* ordinal227 */
4699         uint32_t set_resources_header;  /* ordinal228 */
4700         uint32_t set_resources_dw1;  /* ordinal229 */
4701         uint32_t set_resources_dw2;  /* ordinal230 */
4702         uint32_t set_resources_dw3;  /* ordinal231 */
4703         uint32_t set_resources_dw4;  /* ordinal232 */
4704         uint32_t set_resources_dw5;  /* ordinal233 */
4705         uint32_t set_resources_dw6;  /* ordinal234 */
4706         uint32_t set_resources_dw7;  /* ordinal235 */
4707         uint32_t reserved59;  /* ordinal236 */
4708         uint32_t reserved60;  /* ordinal237 */
4709         uint32_t reserved61;  /* ordinal238 */
4710         uint32_t reserved62;  /* ordinal239 */
4711         uint32_t reserved63;  /* ordinal240 */
4712         uint32_t reserved64;  /* ordinal241 */
4713         uint32_t reserved65;  /* ordinal242 */
4714         uint32_t reserved66;  /* ordinal243 */
4715         uint32_t reserved67;  /* ordinal244 */
4716         uint32_t reserved68;  /* ordinal245 */
4717         uint32_t reserved69;  /* ordinal246 */
4718         uint32_t reserved70;  /* ordinal247 */
4719         uint32_t reserved71;  /* ordinal248 */
4720         uint32_t reserved72;  /* ordinal249 */
4721         uint32_t reserved73;  /* ordinal250 */
4722         uint32_t reserved74;  /* ordinal251 */
4723         uint32_t reserved75;  /* ordinal252 */
4724         uint32_t reserved76;  /* ordinal253 */
4725         uint32_t reserved77;  /* ordinal254 */
4726         uint32_t reserved78;  /* ordinal255 */
4727
4728         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4729 };
4730
4731 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4732 {
4733         int i, r;
4734
4735         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4736                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4737
4738                 if (ring->mqd_obj) {
4739                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4740                         if (unlikely(r != 0))
4741                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4742
4743                         amdgpu_bo_unpin(ring->mqd_obj);
4744                         amdgpu_bo_unreserve(ring->mqd_obj);
4745
4746                         amdgpu_bo_unref(&ring->mqd_obj);
4747                         ring->mqd_obj = NULL;
4748                 }
4749         }
4750 }
4751
4752 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4753 {
4754         int r, i, j;
4755         u32 tmp;
4756         bool use_doorbell = true;
4757         u64 hqd_gpu_addr;
4758         u64 mqd_gpu_addr;
4759         u64 eop_gpu_addr;
4760         u64 wb_gpu_addr;
4761         u32 *buf;
4762         struct vi_mqd *mqd;
4763
4764         /* init the pipes */
4765         mutex_lock(&adev->srbm_mutex);
4766         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4767                 int me = (i < 4) ? 1 : 2;
4768                 int pipe = (i < 4) ? i : (i - 4);
4769
4770                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4771                 eop_gpu_addr >>= 8;
4772
4773                 vi_srbm_select(adev, me, pipe, 0, 0);
4774
4775                 /* write the EOP addr */
4776                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4777                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4778
4779                 /* set the VMID assigned */
4780                 WREG32(mmCP_HQD_VMID, 0);
4781
4782                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4783                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4784                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4785                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4786                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4787         }
4788         vi_srbm_select(adev, 0, 0, 0, 0);
4789         mutex_unlock(&adev->srbm_mutex);
4790
4791         /* init the queues.  Just two for now. */
4792         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4793                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4794
4795                 if (ring->mqd_obj == NULL) {
4796                         r = amdgpu_bo_create(adev,
4797                                              sizeof(struct vi_mqd),
4798                                              PAGE_SIZE, true,
4799                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4800                                              NULL, &ring->mqd_obj);
4801                         if (r) {
4802                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4803                                 return r;
4804                         }
4805                 }
4806
4807                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4808                 if (unlikely(r != 0)) {
4809                         gfx_v8_0_cp_compute_fini(adev);
4810                         return r;
4811                 }
4812                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4813                                   &mqd_gpu_addr);
4814                 if (r) {
4815                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4816                         gfx_v8_0_cp_compute_fini(adev);
4817                         return r;
4818                 }
4819                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4820                 if (r) {
4821                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4822                         gfx_v8_0_cp_compute_fini(adev);
4823                         return r;
4824                 }
4825
4826                 /* init the mqd struct */
4827                 memset(buf, 0, sizeof(struct vi_mqd));
4828
4829                 mqd = (struct vi_mqd *)buf;
4830                 mqd->header = 0xC0310800;
4831                 mqd->compute_pipelinestat_enable = 0x00000001;
4832                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4833                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4834                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4835                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4836                 mqd->compute_misc_reserved = 0x00000003;
4837
4838                 mutex_lock(&adev->srbm_mutex);
4839                 vi_srbm_select(adev, ring->me,
4840                                ring->pipe,
4841                                ring->queue, 0);
4842
4843                 /* disable wptr polling */
4844                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4845                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4846                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4847
4848                 mqd->cp_hqd_eop_base_addr_lo =
4849                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4850                 mqd->cp_hqd_eop_base_addr_hi =
4851                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4852
4853                 /* enable doorbell? */
4854                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4855                 if (use_doorbell) {
4856                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4857                 } else {
4858                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4859                 }
4860                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4861                 mqd->cp_hqd_pq_doorbell_control = tmp;
4862
4863                 /* disable the queue if it's active */
4864                 mqd->cp_hqd_dequeue_request = 0;
4865                 mqd->cp_hqd_pq_rptr = 0;
4866                 mqd->cp_hqd_pq_wptr= 0;
4867                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4868                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4869                         for (j = 0; j < adev->usec_timeout; j++) {
4870                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4871                                         break;
4872                                 udelay(1);
4873                         }
4874                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4875                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4876                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4877                 }
4878
4879                 /* set the pointer to the MQD */
4880                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4881                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4882                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4883                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4884
4885                 /* set MQD vmid to 0 */
4886                 tmp = RREG32(mmCP_MQD_CONTROL);
4887                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4888                 WREG32(mmCP_MQD_CONTROL, tmp);
4889                 mqd->cp_mqd_control = tmp;
4890
4891                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4892                 hqd_gpu_addr = ring->gpu_addr >> 8;
4893                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4894                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4895                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4896                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4897
4898                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4899                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4900                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4901                                     (order_base_2(ring->ring_size / 4) - 1));
4902                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4903                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4904 #ifdef __BIG_ENDIAN
4905                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4906 #endif
4907                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4908                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4909                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4910                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4911                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4912                 mqd->cp_hqd_pq_control = tmp;
4913
4914                 /* set the wb address wether it's enabled or not */
4915                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4916                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4917                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4918                         upper_32_bits(wb_gpu_addr) & 0xffff;
4919                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4920                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4921                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4922                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4923
4924                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4925                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4926                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4927                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4928                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4929                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4930                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4931
4932                 /* enable the doorbell if requested */
4933                 if (use_doorbell) {
4934                         if ((adev->asic_type == CHIP_CARRIZO) ||
4935                             (adev->asic_type == CHIP_FIJI) ||
4936                             (adev->asic_type == CHIP_STONEY) ||
4937                             (adev->asic_type == CHIP_POLARIS11) ||
4938                             (adev->asic_type == CHIP_POLARIS10)) {
4939                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4940                                        AMDGPU_DOORBELL_KIQ << 2);
4941                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4942                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4943                         }
4944                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4945                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4946                                             DOORBELL_OFFSET, ring->doorbell_index);
4947                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4948                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4949                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4950                         mqd->cp_hqd_pq_doorbell_control = tmp;
4951
4952                 } else {
4953                         mqd->cp_hqd_pq_doorbell_control = 0;
4954                 }
4955                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4956                        mqd->cp_hqd_pq_doorbell_control);
4957
4958                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4959                 ring->wptr = 0;
4960                 mqd->cp_hqd_pq_wptr = ring->wptr;
4961                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4962                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4963
4964                 /* set the vmid for the queue */
4965                 mqd->cp_hqd_vmid = 0;
4966                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4967
4968                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4969                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4970                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4971                 mqd->cp_hqd_persistent_state = tmp;
4972                 if (adev->asic_type == CHIP_STONEY ||
4973                         adev->asic_type == CHIP_POLARIS11 ||
4974                         adev->asic_type == CHIP_POLARIS10) {
4975                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4976                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4977                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4978                 }
4979
4980                 /* activate the queue */
4981                 mqd->cp_hqd_active = 1;
4982                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4983
4984                 vi_srbm_select(adev, 0, 0, 0, 0);
4985                 mutex_unlock(&adev->srbm_mutex);
4986
4987                 amdgpu_bo_kunmap(ring->mqd_obj);
4988                 amdgpu_bo_unreserve(ring->mqd_obj);
4989         }
4990
4991         if (use_doorbell) {
4992                 tmp = RREG32(mmCP_PQ_STATUS);
4993                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4994                 WREG32(mmCP_PQ_STATUS, tmp);
4995         }
4996
4997         gfx_v8_0_cp_compute_enable(adev, true);
4998
4999         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5000                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5001
5002                 ring->ready = true;
5003                 r = amdgpu_ring_test_ring(ring);
5004                 if (r)
5005                         ring->ready = false;
5006         }
5007
5008         return 0;
5009 }
5010
5011 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5012 {
5013         int r;
5014
5015         if (!(adev->flags & AMD_IS_APU))
5016                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5017
5018         if (!adev->pp_enabled) {
5019                 if (!adev->firmware.smu_load) {
5020                         /* legacy firmware loading */
5021                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5022                         if (r)
5023                                 return r;
5024
5025                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5026                         if (r)
5027                                 return r;
5028                 } else {
5029                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5030                                                         AMDGPU_UCODE_ID_CP_CE);
5031                         if (r)
5032                                 return -EINVAL;
5033
5034                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5035                                                         AMDGPU_UCODE_ID_CP_PFP);
5036                         if (r)
5037                                 return -EINVAL;
5038
5039                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5040                                                         AMDGPU_UCODE_ID_CP_ME);
5041                         if (r)
5042                                 return -EINVAL;
5043
5044                         if (adev->asic_type == CHIP_TOPAZ) {
5045                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5046                                 if (r)
5047                                         return r;
5048                         } else {
5049                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5050                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5051                                 if (r)
5052                                         return -EINVAL;
5053                         }
5054                 }
5055         }
5056
5057         r = gfx_v8_0_cp_gfx_resume(adev);
5058         if (r)
5059                 return r;
5060
5061         r = gfx_v8_0_cp_compute_resume(adev);
5062         if (r)
5063                 return r;
5064
5065         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5066
5067         return 0;
5068 }
5069
5070 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5071 {
5072         gfx_v8_0_cp_gfx_enable(adev, enable);
5073         gfx_v8_0_cp_compute_enable(adev, enable);
5074 }
5075
5076 static int gfx_v8_0_hw_init(void *handle)
5077 {
5078         int r;
5079         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5080
5081         gfx_v8_0_init_golden_registers(adev);
5082         gfx_v8_0_gpu_init(adev);
5083
5084         r = gfx_v8_0_rlc_resume(adev);
5085         if (r)
5086                 return r;
5087
5088         r = gfx_v8_0_cp_resume(adev);
5089
5090         return r;
5091 }
5092
5093 static int gfx_v8_0_hw_fini(void *handle)
5094 {
5095         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5096
5097         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5098         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5099         gfx_v8_0_cp_enable(adev, false);
5100         gfx_v8_0_rlc_stop(adev);
5101         gfx_v8_0_cp_compute_fini(adev);
5102
5103         amdgpu_set_powergating_state(adev,
5104                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5105
5106         return 0;
5107 }
5108
5109 static int gfx_v8_0_suspend(void *handle)
5110 {
5111         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5112
5113         return gfx_v8_0_hw_fini(adev);
5114 }
5115
5116 static int gfx_v8_0_resume(void *handle)
5117 {
5118         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5119
5120         return gfx_v8_0_hw_init(adev);
5121 }
5122
5123 static bool gfx_v8_0_is_idle(void *handle)
5124 {
5125         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5126
5127         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5128                 return false;
5129         else
5130                 return true;
5131 }
5132
5133 static int gfx_v8_0_wait_for_idle(void *handle)
5134 {
5135         unsigned i;
5136         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5137
5138         for (i = 0; i < adev->usec_timeout; i++) {
5139                 if (gfx_v8_0_is_idle(handle))
5140                         return 0;
5141
5142                 udelay(1);
5143         }
5144         return -ETIMEDOUT;
5145 }
5146
5147 static bool gfx_v8_0_check_soft_reset(void *handle)
5148 {
5149         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5150         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5151         u32 tmp;
5152
5153         /* GRBM_STATUS */
5154         tmp = RREG32(mmGRBM_STATUS);
5155         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5156                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5157                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5158                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5159                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5160                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5161                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5162                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5163                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5164                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5165                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5166                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5167                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5168         }
5169
5170         /* GRBM_STATUS2 */
5171         tmp = RREG32(mmGRBM_STATUS2);
5172         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5173                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5174                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5175
5176         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5177             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5178             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5179                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5180                                                 SOFT_RESET_CPF, 1);
5181                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5182                                                 SOFT_RESET_CPC, 1);
5183                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5184                                                 SOFT_RESET_CPG, 1);
5185                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5186                                                 SOFT_RESET_GRBM, 1);
5187         }
5188
5189         /* SRBM_STATUS */
5190         tmp = RREG32(mmSRBM_STATUS);
5191         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5192                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5193                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5194         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5195                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5196                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5197
5198         if (grbm_soft_reset || srbm_soft_reset) {
5199                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5200                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5201                 return true;
5202         } else {
5203                 adev->gfx.grbm_soft_reset = 0;
5204                 adev->gfx.srbm_soft_reset = 0;
5205                 return false;
5206         }
5207 }
5208
5209 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5210                                   struct amdgpu_ring *ring)
5211 {
5212         int i;
5213
5214         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5215         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5216                 u32 tmp;
5217                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5218                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5219                                     DEQUEUE_REQ, 2);
5220                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5221                 for (i = 0; i < adev->usec_timeout; i++) {
5222                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5223                                 break;
5224                         udelay(1);
5225                 }
5226         }
5227 }
5228
5229 static int gfx_v8_0_pre_soft_reset(void *handle)
5230 {
5231         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5232         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5233
5234         if ((!adev->gfx.grbm_soft_reset) &&
5235             (!adev->gfx.srbm_soft_reset))
5236                 return 0;
5237
5238         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5239         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5240
5241         /* stop the rlc */
5242         gfx_v8_0_rlc_stop(adev);
5243
5244         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5245             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5246                 /* Disable GFX parsing/prefetching */
5247                 gfx_v8_0_cp_gfx_enable(adev, false);
5248
5249         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5250             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5251             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5252             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5253                 int i;
5254
5255                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5256                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5257
5258                         gfx_v8_0_inactive_hqd(adev, ring);
5259                 }
5260                 /* Disable MEC parsing/prefetching */
5261                 gfx_v8_0_cp_compute_enable(adev, false);
5262         }
5263
5264        return 0;
5265 }
5266
5267 static int gfx_v8_0_soft_reset(void *handle)
5268 {
5269         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5270         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5271         u32 tmp;
5272
5273         if ((!adev->gfx.grbm_soft_reset) &&
5274             (!adev->gfx.srbm_soft_reset))
5275                 return 0;
5276
5277         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5278         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5279
5280         if (grbm_soft_reset || srbm_soft_reset) {
5281                 tmp = RREG32(mmGMCON_DEBUG);
5282                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5283                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5284                 WREG32(mmGMCON_DEBUG, tmp);
5285                 udelay(50);
5286         }
5287
5288         if (grbm_soft_reset) {
5289                 tmp = RREG32(mmGRBM_SOFT_RESET);
5290                 tmp |= grbm_soft_reset;
5291                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5292                 WREG32(mmGRBM_SOFT_RESET, tmp);
5293                 tmp = RREG32(mmGRBM_SOFT_RESET);
5294
5295                 udelay(50);
5296
5297                 tmp &= ~grbm_soft_reset;
5298                 WREG32(mmGRBM_SOFT_RESET, tmp);
5299                 tmp = RREG32(mmGRBM_SOFT_RESET);
5300         }
5301
5302         if (srbm_soft_reset) {
5303                 tmp = RREG32(mmSRBM_SOFT_RESET);
5304                 tmp |= srbm_soft_reset;
5305                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5306                 WREG32(mmSRBM_SOFT_RESET, tmp);
5307                 tmp = RREG32(mmSRBM_SOFT_RESET);
5308
5309                 udelay(50);
5310
5311                 tmp &= ~srbm_soft_reset;
5312                 WREG32(mmSRBM_SOFT_RESET, tmp);
5313                 tmp = RREG32(mmSRBM_SOFT_RESET);
5314         }
5315
5316         if (grbm_soft_reset || srbm_soft_reset) {
5317                 tmp = RREG32(mmGMCON_DEBUG);
5318                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5319                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5320                 WREG32(mmGMCON_DEBUG, tmp);
5321         }
5322
5323         /* Wait a little for things to settle down */
5324         udelay(50);
5325
5326         return 0;
5327 }
5328
5329 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5330                               struct amdgpu_ring *ring)
5331 {
5332         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5333         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5334         WREG32(mmCP_HQD_PQ_RPTR, 0);
5335         WREG32(mmCP_HQD_PQ_WPTR, 0);
5336         vi_srbm_select(adev, 0, 0, 0, 0);
5337 }
5338
5339 static int gfx_v8_0_post_soft_reset(void *handle)
5340 {
5341         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5342         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5343
5344         if ((!adev->gfx.grbm_soft_reset) &&
5345             (!adev->gfx.srbm_soft_reset))
5346                 return 0;
5347
5348         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5349         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5350
5351         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5352             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5353                 gfx_v8_0_cp_gfx_resume(adev);
5354
5355         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5356             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5357             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5358             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5359                 int i;
5360
5361                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5362                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5363
5364                         gfx_v8_0_init_hqd(adev, ring);
5365                 }
5366                 gfx_v8_0_cp_compute_resume(adev);
5367         }
5368         gfx_v8_0_rlc_start(adev);
5369
5370         return 0;
5371 }
5372
5373 /**
5374  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5375  *
5376  * @adev: amdgpu_device pointer
5377  *
5378  * Fetches a GPU clock counter snapshot.
5379  * Returns the 64 bit clock counter snapshot.
5380  */
5381 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5382 {
5383         uint64_t clock;
5384
5385         mutex_lock(&adev->gfx.gpu_clock_mutex);
5386         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5387         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5388                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5389         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5390         return clock;
5391 }
5392
5393 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5394                                           uint32_t vmid,
5395                                           uint32_t gds_base, uint32_t gds_size,
5396                                           uint32_t gws_base, uint32_t gws_size,
5397                                           uint32_t oa_base, uint32_t oa_size)
5398 {
5399         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5400         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5401
5402         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5403         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5404
5405         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5406         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5407
5408         /* GDS Base */
5409         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5410         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5411                                 WRITE_DATA_DST_SEL(0)));
5412         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5413         amdgpu_ring_write(ring, 0);
5414         amdgpu_ring_write(ring, gds_base);
5415
5416         /* GDS Size */
5417         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5418         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5419                                 WRITE_DATA_DST_SEL(0)));
5420         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5421         amdgpu_ring_write(ring, 0);
5422         amdgpu_ring_write(ring, gds_size);
5423
5424         /* GWS */
5425         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5426         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5427                                 WRITE_DATA_DST_SEL(0)));
5428         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5429         amdgpu_ring_write(ring, 0);
5430         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5431
5432         /* OA */
5433         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5434         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5435                                 WRITE_DATA_DST_SEL(0)));
5436         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5437         amdgpu_ring_write(ring, 0);
5438         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5439 }
5440
5441 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5442         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5443         .select_se_sh = &gfx_v8_0_select_se_sh,
5444 };
5445
5446 static int gfx_v8_0_early_init(void *handle)
5447 {
5448         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5449
5450         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5451         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5452         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5453         gfx_v8_0_set_ring_funcs(adev);
5454         gfx_v8_0_set_irq_funcs(adev);
5455         gfx_v8_0_set_gds_init(adev);
5456         gfx_v8_0_set_rlc_funcs(adev);
5457
5458         return 0;
5459 }
5460
5461 static int gfx_v8_0_late_init(void *handle)
5462 {
5463         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5464         int r;
5465
5466         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5467         if (r)
5468                 return r;
5469
5470         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5471         if (r)
5472                 return r;
5473
5474         /* requires IBs so do in late init after IB pool is initialized */
5475         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5476         if (r)
5477                 return r;
5478
5479         amdgpu_set_powergating_state(adev,
5480                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5481
5482         return 0;
5483 }
5484
5485 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5486                                                        bool enable)
5487 {
5488         if (adev->asic_type == CHIP_POLARIS11)
5489                 /* Send msg to SMU via Powerplay */
5490                 amdgpu_set_powergating_state(adev,
5491                                              AMD_IP_BLOCK_TYPE_SMC,
5492                                              enable ?
5493                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5494
5495         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5496 }
5497
5498 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5499                                                         bool enable)
5500 {
5501         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5502 }
5503
5504 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5505                 bool enable)
5506 {
5507         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5508 }
5509
5510 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5511                                           bool enable)
5512 {
5513         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5514 }
5515
5516 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5517                                                 bool enable)
5518 {
5519         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5520
5521         /* Read any GFX register to wake up GFX. */
5522         if (!enable)
5523                 RREG32(mmDB_RENDER_CONTROL);
5524 }
5525
5526 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5527                                           bool enable)
5528 {
5529         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5530                 cz_enable_gfx_cg_power_gating(adev, true);
5531                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5532                         cz_enable_gfx_pipeline_power_gating(adev, true);
5533         } else {
5534                 cz_enable_gfx_cg_power_gating(adev, false);
5535                 cz_enable_gfx_pipeline_power_gating(adev, false);
5536         }
5537 }
5538
5539 static int gfx_v8_0_set_powergating_state(void *handle,
5540                                           enum amd_powergating_state state)
5541 {
5542         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5543         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5544
5545         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5546                 return 0;
5547
5548         switch (adev->asic_type) {
5549         case CHIP_CARRIZO:
5550         case CHIP_STONEY:
5551                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5552                         cz_update_gfx_cg_power_gating(adev, enable);
5553
5554                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5555                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5556                 else
5557                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5558
5559                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5560                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5561                 else
5562                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5563                 break;
5564         case CHIP_POLARIS11:
5565                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5566                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5567                 else
5568                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5569
5570                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5571                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5572                 else
5573                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5574
5575                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5576                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5577                 else
5578                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5579                 break;
5580         default:
5581                 break;
5582         }
5583
5584         return 0;
5585 }
5586
5587 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5588                                      uint32_t reg_addr, uint32_t cmd)
5589 {
5590         uint32_t data;
5591
5592         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5593
5594         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5595         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5596
5597         data = RREG32(mmRLC_SERDES_WR_CTRL);
5598         if (adev->asic_type == CHIP_STONEY)
5599                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5600                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5601                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5602                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5603                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5604                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5605                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5606                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5607                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5608         else
5609                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5610                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5611                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5612                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5613                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5614                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5615                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5616                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5617                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5618                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5619                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5620         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5621                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5622                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5623                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5624
5625         WREG32(mmRLC_SERDES_WR_CTRL, data);
5626 }
5627
5628 #define MSG_ENTER_RLC_SAFE_MODE     1
5629 #define MSG_EXIT_RLC_SAFE_MODE      0
5630 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5631 #define RLC_GPR_REG2__REQ__SHIFT 0
5632 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5633 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5634
5635 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5636 {
5637         u32 data = 0;
5638         unsigned i;
5639
5640         data = RREG32(mmRLC_CNTL);
5641         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5642                 return;
5643
5644         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5645             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5646                                AMD_PG_SUPPORT_GFX_DMG))) {
5647                 data |= RLC_GPR_REG2__REQ_MASK;
5648                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5649                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5650                 WREG32(mmRLC_GPR_REG2, data);
5651
5652                 for (i = 0; i < adev->usec_timeout; i++) {
5653                         if ((RREG32(mmRLC_GPM_STAT) &
5654                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5655                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5656                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5657                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5658                                 break;
5659                         udelay(1);
5660                 }
5661
5662                 for (i = 0; i < adev->usec_timeout; i++) {
5663                         if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5664                                 break;
5665                         udelay(1);
5666                 }
5667                 adev->gfx.rlc.in_safe_mode = true;
5668         }
5669 }
5670
5671 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5672 {
5673         u32 data;
5674         unsigned i;
5675
5676         data = RREG32(mmRLC_CNTL);
5677         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5678                 return;
5679
5680         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5681             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5682                                AMD_PG_SUPPORT_GFX_DMG))) {
5683                 data |= RLC_GPR_REG2__REQ_MASK;
5684                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5685                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5686                 WREG32(mmRLC_GPR_REG2, data);
5687                 adev->gfx.rlc.in_safe_mode = false;
5688         }
5689
5690         for (i = 0; i < adev->usec_timeout; i++) {
5691                 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5692                         break;
5693                 udelay(1);
5694         }
5695 }
5696
5697 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5698 {
5699         u32 data;
5700         unsigned i;
5701
5702         data = RREG32(mmRLC_CNTL);
5703         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5704                 return;
5705
5706         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5707                 data |= RLC_SAFE_MODE__CMD_MASK;
5708                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5709                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5710                 WREG32(mmRLC_SAFE_MODE, data);
5711
5712                 for (i = 0; i < adev->usec_timeout; i++) {
5713                         if ((RREG32(mmRLC_GPM_STAT) &
5714                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5715                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5716                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5717                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5718                                 break;
5719                         udelay(1);
5720                 }
5721
5722                 for (i = 0; i < adev->usec_timeout; i++) {
5723                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5724                                 break;
5725                         udelay(1);
5726                 }
5727                 adev->gfx.rlc.in_safe_mode = true;
5728         }
5729 }
5730
5731 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5732 {
5733         u32 data = 0;
5734         unsigned i;
5735
5736         data = RREG32(mmRLC_CNTL);
5737         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5738                 return;
5739
5740         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5741                 if (adev->gfx.rlc.in_safe_mode) {
5742                         data |= RLC_SAFE_MODE__CMD_MASK;
5743                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5744                         WREG32(mmRLC_SAFE_MODE, data);
5745                         adev->gfx.rlc.in_safe_mode = false;
5746                 }
5747         }
5748
5749         for (i = 0; i < adev->usec_timeout; i++) {
5750                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5751                         break;
5752                 udelay(1);
5753         }
5754 }
5755
5756 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5757 {
5758         adev->gfx.rlc.in_safe_mode = true;
5759 }
5760
5761 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5762 {
5763         adev->gfx.rlc.in_safe_mode = false;
5764 }
5765
5766 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5767         .enter_safe_mode = cz_enter_rlc_safe_mode,
5768         .exit_safe_mode = cz_exit_rlc_safe_mode
5769 };
5770
5771 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5772         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5773         .exit_safe_mode = iceland_exit_rlc_safe_mode
5774 };
5775
5776 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5777         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5778         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5779 };
5780
5781 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5782                                                       bool enable)
5783 {
5784         uint32_t temp, data;
5785
5786         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5787
5788         /* It is disabled by HW by default */
5789         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5790                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5791                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5792                                 /* 1 - RLC memory Light sleep */
5793                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5794
5795                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5796                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5797                 }
5798
5799                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5800                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5801                 if (adev->flags & AMD_IS_APU)
5802                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5803                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5804                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5805                 else
5806                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5807                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5808                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5809                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5810
5811                 if (temp != data)
5812                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5813
5814                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5815                 gfx_v8_0_wait_for_rlc_serdes(adev);
5816
5817                 /* 5 - clear mgcg override */
5818                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5819
5820                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5821                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5822                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5823                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5824                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5825                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5826                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5827                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5828                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5829                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5830                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5831                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5832                         if (temp != data)
5833                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5834                 }
5835                 udelay(50);
5836
5837                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5838                 gfx_v8_0_wait_for_rlc_serdes(adev);
5839         } else {
5840                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5841                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5842                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5843                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5844                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5845                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5846                 if (temp != data)
5847                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5848
5849                 /* 2 - disable MGLS in RLC */
5850                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5851                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5852                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5853                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5854                 }
5855
5856                 /* 3 - disable MGLS in CP */
5857                 data = RREG32(mmCP_MEM_SLP_CNTL);
5858                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5859                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5860                         WREG32(mmCP_MEM_SLP_CNTL, data);
5861                 }
5862
5863                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5864                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5865                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5866                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5867                 if (temp != data)
5868                         WREG32(mmCGTS_SM_CTRL_REG, data);
5869
5870                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5871                 gfx_v8_0_wait_for_rlc_serdes(adev);
5872
5873                 /* 6 - set mgcg override */
5874                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5875
5876                 udelay(50);
5877
5878                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5879                 gfx_v8_0_wait_for_rlc_serdes(adev);
5880         }
5881
5882         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5883 }
5884
5885 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5886                                                       bool enable)
5887 {
5888         uint32_t temp, temp1, data, data1;
5889
5890         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5891
5892         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5893
5894         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5895                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5896                  * Cmp_busy/GFX_Idle interrupts
5897                  */
5898                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5899
5900                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5901                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5902                 if (temp1 != data1)
5903                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5904
5905                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5906                 gfx_v8_0_wait_for_rlc_serdes(adev);
5907
5908                 /* 3 - clear cgcg override */
5909                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5910
5911                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5912                 gfx_v8_0_wait_for_rlc_serdes(adev);
5913
5914                 /* 4 - write cmd to set CGLS */
5915                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5916
5917                 /* 5 - enable cgcg */
5918                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5919
5920                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5921                         /* enable cgls*/
5922                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5923
5924                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5925                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5926
5927                         if (temp1 != data1)
5928                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5929                 } else {
5930                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5931                 }
5932
5933                 if (temp != data)
5934                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5935         } else {
5936                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5937                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5938
5939                 /* TEST CGCG */
5940                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5941                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5942                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5943                 if (temp1 != data1)
5944                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5945
5946                 /* read gfx register to wake up cgcg */
5947                 RREG32(mmCB_CGTT_SCLK_CTRL);
5948                 RREG32(mmCB_CGTT_SCLK_CTRL);
5949                 RREG32(mmCB_CGTT_SCLK_CTRL);
5950                 RREG32(mmCB_CGTT_SCLK_CTRL);
5951
5952                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5953                 gfx_v8_0_wait_for_rlc_serdes(adev);
5954
5955                 /* write cmd to Set CGCG Overrride */
5956                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5957
5958                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5959                 gfx_v8_0_wait_for_rlc_serdes(adev);
5960
5961                 /* write cmd to Clear CGLS */
5962                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5963
5964                 /* disable cgcg, cgls should be disabled too. */
5965                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5966                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5967                 if (temp != data)
5968                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5969         }
5970
5971         gfx_v8_0_wait_for_rlc_serdes(adev);
5972
5973         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5974 }
5975 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5976                                             bool enable)
5977 {
5978         if (enable) {
5979                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5980                  * ===  MGCG + MGLS + TS(CG/LS) ===
5981                  */
5982                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5983                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5984         } else {
5985                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5986                  * ===  CGCG + CGLS ===
5987                  */
5988                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5989                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5990         }
5991         return 0;
5992 }
5993
5994 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5995                                           enum amd_clockgating_state state)
5996 {
5997         uint32_t msg_id, pp_state;
5998         void *pp_handle = adev->powerplay.pp_handle;
5999
6000         if (state == AMD_CG_STATE_UNGATE)
6001                 pp_state = 0;
6002         else
6003                 pp_state = PP_STATE_CG | PP_STATE_LS;
6004
6005         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6006                         PP_BLOCK_GFX_CG,
6007                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6008                         pp_state);
6009         amd_set_clockgating_by_smu(pp_handle, msg_id);
6010
6011         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6012                         PP_BLOCK_GFX_MG,
6013                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6014                         pp_state);
6015         amd_set_clockgating_by_smu(pp_handle, msg_id);
6016
6017         return 0;
6018 }
6019
6020 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6021                                           enum amd_clockgating_state state)
6022 {
6023         uint32_t msg_id, pp_state;
6024         void *pp_handle = adev->powerplay.pp_handle;
6025
6026         if (state == AMD_CG_STATE_UNGATE)
6027                 pp_state = 0;
6028         else
6029                 pp_state = PP_STATE_CG | PP_STATE_LS;
6030
6031         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6032                         PP_BLOCK_GFX_CG,
6033                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6034                         pp_state);
6035         amd_set_clockgating_by_smu(pp_handle, msg_id);
6036
6037         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6038                         PP_BLOCK_GFX_3D,
6039                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6040                         pp_state);
6041         amd_set_clockgating_by_smu(pp_handle, msg_id);
6042
6043         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6044                         PP_BLOCK_GFX_MG,
6045                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6046                         pp_state);
6047         amd_set_clockgating_by_smu(pp_handle, msg_id);
6048
6049         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6050                         PP_BLOCK_GFX_RLC,
6051                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6052                         pp_state);
6053         amd_set_clockgating_by_smu(pp_handle, msg_id);
6054
6055         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6056                         PP_BLOCK_GFX_CP,
6057                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6058                         pp_state);
6059         amd_set_clockgating_by_smu(pp_handle, msg_id);
6060
6061         return 0;
6062 }
6063
6064 static int gfx_v8_0_set_clockgating_state(void *handle,
6065                                           enum amd_clockgating_state state)
6066 {
6067         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6068
6069         switch (adev->asic_type) {
6070         case CHIP_FIJI:
6071         case CHIP_CARRIZO:
6072         case CHIP_STONEY:
6073                 gfx_v8_0_update_gfx_clock_gating(adev,
6074                                                  state == AMD_CG_STATE_GATE ? true : false);
6075                 break;
6076         case CHIP_TONGA:
6077                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6078                 break;
6079         case CHIP_POLARIS10:
6080         case CHIP_POLARIS11:
6081                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6082                 break;
6083         default:
6084                 break;
6085         }
6086         return 0;
6087 }
6088
6089 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6090 {
6091         return ring->adev->wb.wb[ring->rptr_offs];
6092 }
6093
6094 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6095 {
6096         struct amdgpu_device *adev = ring->adev;
6097
6098         if (ring->use_doorbell)
6099                 /* XXX check if swapping is necessary on BE */
6100                 return ring->adev->wb.wb[ring->wptr_offs];
6101         else
6102                 return RREG32(mmCP_RB0_WPTR);
6103 }
6104
6105 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6106 {
6107         struct amdgpu_device *adev = ring->adev;
6108
6109         if (ring->use_doorbell) {
6110                 /* XXX check if swapping is necessary on BE */
6111                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6112                 WDOORBELL32(ring->doorbell_index, ring->wptr);
6113         } else {
6114                 WREG32(mmCP_RB0_WPTR, ring->wptr);
6115                 (void)RREG32(mmCP_RB0_WPTR);
6116         }
6117 }
6118
6119 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6120 {
6121         u32 ref_and_mask, reg_mem_engine;
6122
6123         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
6124                 switch (ring->me) {
6125                 case 1:
6126                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6127                         break;
6128                 case 2:
6129                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6130                         break;
6131                 default:
6132                         return;
6133                 }
6134                 reg_mem_engine = 0;
6135         } else {
6136                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6137                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6138         }
6139
6140         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6141         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6142                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6143                                  reg_mem_engine));
6144         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6145         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6146         amdgpu_ring_write(ring, ref_and_mask);
6147         amdgpu_ring_write(ring, ref_and_mask);
6148         amdgpu_ring_write(ring, 0x20); /* poll interval */
6149 }
6150
6151 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6152 {
6153         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6154         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6155                                  WRITE_DATA_DST_SEL(0) |
6156                                  WR_CONFIRM));
6157         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6158         amdgpu_ring_write(ring, 0);
6159         amdgpu_ring_write(ring, 1);
6160
6161 }
6162
6163 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6164                                       struct amdgpu_ib *ib,
6165                                       unsigned vm_id, bool ctx_switch)
6166 {
6167         u32 header, control = 0;
6168
6169         if (ib->flags & AMDGPU_IB_FLAG_CE)
6170                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6171         else
6172                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6173
6174         control |= ib->length_dw | (vm_id << 24);
6175
6176         amdgpu_ring_write(ring, header);
6177         amdgpu_ring_write(ring,
6178 #ifdef __BIG_ENDIAN
6179                           (2 << 0) |
6180 #endif
6181                           (ib->gpu_addr & 0xFFFFFFFC));
6182         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6183         amdgpu_ring_write(ring, control);
6184 }
6185
6186 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6187                                           struct amdgpu_ib *ib,
6188                                           unsigned vm_id, bool ctx_switch)
6189 {
6190         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6191
6192         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6193         amdgpu_ring_write(ring,
6194 #ifdef __BIG_ENDIAN
6195                                 (2 << 0) |
6196 #endif
6197                                 (ib->gpu_addr & 0xFFFFFFFC));
6198         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6199         amdgpu_ring_write(ring, control);
6200 }
6201
6202 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6203                                          u64 seq, unsigned flags)
6204 {
6205         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6206         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6207
6208         /* EVENT_WRITE_EOP - flush caches, send int */
6209         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6210         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6211                                  EOP_TC_ACTION_EN |
6212                                  EOP_TC_WB_ACTION_EN |
6213                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6214                                  EVENT_INDEX(5)));
6215         amdgpu_ring_write(ring, addr & 0xfffffffc);
6216         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6217                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6218         amdgpu_ring_write(ring, lower_32_bits(seq));
6219         amdgpu_ring_write(ring, upper_32_bits(seq));
6220
6221 }
6222
6223 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6224 {
6225         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6226         uint32_t seq = ring->fence_drv.sync_seq;
6227         uint64_t addr = ring->fence_drv.gpu_addr;
6228
6229         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6230         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6231                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6232                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6233         amdgpu_ring_write(ring, addr & 0xfffffffc);
6234         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6235         amdgpu_ring_write(ring, seq);
6236         amdgpu_ring_write(ring, 0xffffffff);
6237         amdgpu_ring_write(ring, 4); /* poll interval */
6238 }
6239
6240 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6241                                         unsigned vm_id, uint64_t pd_addr)
6242 {
6243         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6244
6245         /* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
6246         if (usepfp)
6247                 amdgpu_ring_insert_nop(ring, 128);
6248
6249         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6250         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6251                                  WRITE_DATA_DST_SEL(0)) |
6252                                  WR_CONFIRM);
6253         if (vm_id < 8) {
6254                 amdgpu_ring_write(ring,
6255                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6256         } else {
6257                 amdgpu_ring_write(ring,
6258                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6259         }
6260         amdgpu_ring_write(ring, 0);
6261         amdgpu_ring_write(ring, pd_addr >> 12);
6262
6263         /* bits 0-15 are the VM contexts0-15 */
6264         /* invalidate the cache */
6265         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6266         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6267                                  WRITE_DATA_DST_SEL(0)));
6268         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6269         amdgpu_ring_write(ring, 0);
6270         amdgpu_ring_write(ring, 1 << vm_id);
6271
6272         /* wait for the invalidate to complete */
6273         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6274         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6275                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6276                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6277         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6278         amdgpu_ring_write(ring, 0);
6279         amdgpu_ring_write(ring, 0); /* ref */
6280         amdgpu_ring_write(ring, 0); /* mask */
6281         amdgpu_ring_write(ring, 0x20); /* poll interval */
6282
6283         /* compute doesn't have PFP */
6284         if (usepfp) {
6285                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6286                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6287                 amdgpu_ring_write(ring, 0x0);
6288                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6289                 amdgpu_ring_insert_nop(ring, 128);
6290         }
6291 }
6292
6293 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6294 {
6295         return ring->adev->wb.wb[ring->wptr_offs];
6296 }
6297
6298 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6299 {
6300         struct amdgpu_device *adev = ring->adev;
6301
6302         /* XXX check if swapping is necessary on BE */
6303         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6304         WDOORBELL32(ring->doorbell_index, ring->wptr);
6305 }
6306
6307 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6308                                              u64 addr, u64 seq,
6309                                              unsigned flags)
6310 {
6311         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6312         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6313
6314         /* RELEASE_MEM - flush caches, send int */
6315         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6316         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6317                                  EOP_TC_ACTION_EN |
6318                                  EOP_TC_WB_ACTION_EN |
6319                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6320                                  EVENT_INDEX(5)));
6321         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6322         amdgpu_ring_write(ring, addr & 0xfffffffc);
6323         amdgpu_ring_write(ring, upper_32_bits(addr));
6324         amdgpu_ring_write(ring, lower_32_bits(seq));
6325         amdgpu_ring_write(ring, upper_32_bits(seq));
6326 }
6327
6328 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6329 {
6330         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6331         amdgpu_ring_write(ring, 0);
6332 }
6333
6334 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6335 {
6336         uint32_t dw2 = 0;
6337
6338         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6339         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6340                 /* set load_global_config & load_global_uconfig */
6341                 dw2 |= 0x8001;
6342                 /* set load_cs_sh_regs */
6343                 dw2 |= 0x01000000;
6344                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6345                 dw2 |= 0x10002;
6346
6347                 /* set load_ce_ram if preamble presented */
6348                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6349                         dw2 |= 0x10000000;
6350         } else {
6351                 /* still load_ce_ram if this is the first time preamble presented
6352                  * although there is no context switch happens.
6353                  */
6354                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6355                         dw2 |= 0x10000000;
6356         }
6357
6358         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6359         amdgpu_ring_write(ring, dw2);
6360         amdgpu_ring_write(ring, 0);
6361 }
6362
6363 static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
6364 {
6365         return
6366                 4; /* gfx_v8_0_ring_emit_ib_gfx */
6367 }
6368
6369 static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
6370 {
6371         return
6372                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6373                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6374                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6375                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6376                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6377                 256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6378                 2 + /* gfx_v8_ring_emit_sb */
6379                 3; /* gfx_v8_ring_emit_cntxcntl */
6380 }
6381
6382 static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
6383 {
6384         return
6385                 4; /* gfx_v8_0_ring_emit_ib_compute */
6386 }
6387
6388 static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
6389 {
6390         return
6391                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6392                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6393                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6394                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6395                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6396                 7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6397 }
6398
6399 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6400                                                  enum amdgpu_interrupt_state state)
6401 {
6402         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6403                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6404 }
6405
6406 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6407                                                      int me, int pipe,
6408                                                      enum amdgpu_interrupt_state state)
6409 {
6410         /*
6411          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6412          * handles the setting of interrupts for this specific pipe. All other
6413          * pipes' interrupts are set by amdkfd.
6414          */
6415
6416         if (me == 1) {
6417                 switch (pipe) {
6418                 case 0:
6419                         break;
6420                 default:
6421                         DRM_DEBUG("invalid pipe %d\n", pipe);
6422                         return;
6423                 }
6424         } else {
6425                 DRM_DEBUG("invalid me %d\n", me);
6426                 return;
6427         }
6428
6429         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6430                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6431 }
6432
6433 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6434                                              struct amdgpu_irq_src *source,
6435                                              unsigned type,
6436                                              enum amdgpu_interrupt_state state)
6437 {
6438         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6439                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6440
6441         return 0;
6442 }
6443
6444 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6445                                               struct amdgpu_irq_src *source,
6446                                               unsigned type,
6447                                               enum amdgpu_interrupt_state state)
6448 {
6449         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6450                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6451
6452         return 0;
6453 }
6454
6455 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6456                                             struct amdgpu_irq_src *src,
6457                                             unsigned type,
6458                                             enum amdgpu_interrupt_state state)
6459 {
6460         switch (type) {
6461         case AMDGPU_CP_IRQ_GFX_EOP:
6462                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6463                 break;
6464         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6465                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6466                 break;
6467         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6468                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6469                 break;
6470         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6471                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6472                 break;
6473         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6474                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6475                 break;
6476         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6477                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6478                 break;
6479         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6480                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6481                 break;
6482         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6483                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6484                 break;
6485         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6486                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6487                 break;
6488         default:
6489                 break;
6490         }
6491         return 0;
6492 }
6493
6494 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6495                             struct amdgpu_irq_src *source,
6496                             struct amdgpu_iv_entry *entry)
6497 {
6498         int i;
6499         u8 me_id, pipe_id, queue_id;
6500         struct amdgpu_ring *ring;
6501
6502         DRM_DEBUG("IH: CP EOP\n");
6503         me_id = (entry->ring_id & 0x0c) >> 2;
6504         pipe_id = (entry->ring_id & 0x03) >> 0;
6505         queue_id = (entry->ring_id & 0x70) >> 4;
6506
6507         switch (me_id) {
6508         case 0:
6509                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6510                 break;
6511         case 1:
6512         case 2:
6513                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6514                         ring = &adev->gfx.compute_ring[i];
6515                         /* Per-queue interrupt is supported for MEC starting from VI.
6516                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6517                           */
6518                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6519                                 amdgpu_fence_process(ring);
6520                 }
6521                 break;
6522         }
6523         return 0;
6524 }
6525
6526 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6527                                  struct amdgpu_irq_src *source,
6528                                  struct amdgpu_iv_entry *entry)
6529 {
6530         DRM_ERROR("Illegal register access in command stream\n");
6531         schedule_work(&adev->reset_work);
6532         return 0;
6533 }
6534
6535 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6536                                   struct amdgpu_irq_src *source,
6537                                   struct amdgpu_iv_entry *entry)
6538 {
6539         DRM_ERROR("Illegal instruction in command stream\n");
6540         schedule_work(&adev->reset_work);
6541         return 0;
6542 }
6543
6544 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6545         .name = "gfx_v8_0",
6546         .early_init = gfx_v8_0_early_init,
6547         .late_init = gfx_v8_0_late_init,
6548         .sw_init = gfx_v8_0_sw_init,
6549         .sw_fini = gfx_v8_0_sw_fini,
6550         .hw_init = gfx_v8_0_hw_init,
6551         .hw_fini = gfx_v8_0_hw_fini,
6552         .suspend = gfx_v8_0_suspend,
6553         .resume = gfx_v8_0_resume,
6554         .is_idle = gfx_v8_0_is_idle,
6555         .wait_for_idle = gfx_v8_0_wait_for_idle,
6556         .check_soft_reset = gfx_v8_0_check_soft_reset,
6557         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6558         .soft_reset = gfx_v8_0_soft_reset,
6559         .post_soft_reset = gfx_v8_0_post_soft_reset,
6560         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6561         .set_powergating_state = gfx_v8_0_set_powergating_state,
6562 };
6563
6564 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6565         .get_rptr = gfx_v8_0_ring_get_rptr,
6566         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6567         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6568         .parse_cs = NULL,
6569         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6570         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6571         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6572         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6573         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6574         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6575         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6576         .test_ring = gfx_v8_0_ring_test_ring,
6577         .test_ib = gfx_v8_0_ring_test_ib,
6578         .insert_nop = amdgpu_ring_insert_nop,
6579         .pad_ib = amdgpu_ring_generic_pad_ib,
6580         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6581         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6582         .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx,
6583         .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx,
6584 };
6585
6586 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6587         .get_rptr = gfx_v8_0_ring_get_rptr,
6588         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6589         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6590         .parse_cs = NULL,
6591         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6592         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6593         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6594         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6595         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6596         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6597         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6598         .test_ring = gfx_v8_0_ring_test_ring,
6599         .test_ib = gfx_v8_0_ring_test_ib,
6600         .insert_nop = amdgpu_ring_insert_nop,
6601         .pad_ib = amdgpu_ring_generic_pad_ib,
6602         .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute,
6603         .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute,
6604 };
6605
6606 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6607 {
6608         int i;
6609
6610         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6611                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6612
6613         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6614                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6615 }
6616
6617 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6618         .set = gfx_v8_0_set_eop_interrupt_state,
6619         .process = gfx_v8_0_eop_irq,
6620 };
6621
6622 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6623         .set = gfx_v8_0_set_priv_reg_fault_state,
6624         .process = gfx_v8_0_priv_reg_irq,
6625 };
6626
6627 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6628         .set = gfx_v8_0_set_priv_inst_fault_state,
6629         .process = gfx_v8_0_priv_inst_irq,
6630 };
6631
6632 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6633 {
6634         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6635         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6636
6637         adev->gfx.priv_reg_irq.num_types = 1;
6638         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6639
6640         adev->gfx.priv_inst_irq.num_types = 1;
6641         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6642 }
6643
6644 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6645 {
6646         switch (adev->asic_type) {
6647         case CHIP_TOPAZ:
6648                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6649                 break;
6650         case CHIP_STONEY:
6651         case CHIP_CARRIZO:
6652                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6653                 break;
6654         default:
6655                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6656                 break;
6657         }
6658 }
6659
6660 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6661 {
6662         /* init asci gds info */
6663         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6664         adev->gds.gws.total_size = 64;
6665         adev->gds.oa.total_size = 16;
6666
6667         if (adev->gds.mem.total_size == 64 * 1024) {
6668                 adev->gds.mem.gfx_partition_size = 4096;
6669                 adev->gds.mem.cs_partition_size = 4096;
6670
6671                 adev->gds.gws.gfx_partition_size = 4;
6672                 adev->gds.gws.cs_partition_size = 4;
6673
6674                 adev->gds.oa.gfx_partition_size = 4;
6675                 adev->gds.oa.cs_partition_size = 1;
6676         } else {
6677                 adev->gds.mem.gfx_partition_size = 1024;
6678                 adev->gds.mem.cs_partition_size = 1024;
6679
6680                 adev->gds.gws.gfx_partition_size = 16;
6681                 adev->gds.gws.cs_partition_size = 16;
6682
6683                 adev->gds.oa.gfx_partition_size = 4;
6684                 adev->gds.oa.cs_partition_size = 4;
6685         }
6686 }
6687
6688 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6689                                                  u32 bitmap)
6690 {
6691         u32 data;
6692
6693         if (!bitmap)
6694                 return;
6695
6696         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6697         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6698
6699         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6700 }
6701
6702 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6703 {
6704         u32 data, mask;
6705
6706         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6707                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6708
6709         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6710
6711         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6712 }
6713
6714 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6715 {
6716         int i, j, k, counter, active_cu_number = 0;
6717         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6718         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6719         unsigned disable_masks[4 * 2];
6720
6721         memset(cu_info, 0, sizeof(*cu_info));
6722
6723         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6724
6725         mutex_lock(&adev->grbm_idx_mutex);
6726         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6727                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6728                         mask = 1;
6729                         ao_bitmap = 0;
6730                         counter = 0;
6731                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6732                         if (i < 4 && j < 2)
6733                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6734                                         adev, disable_masks[i * 2 + j]);
6735                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6736                         cu_info->bitmap[i][j] = bitmap;
6737
6738                         for (k = 0; k < 16; k ++) {
6739                                 if (bitmap & mask) {
6740                                         if (counter < 2)
6741                                                 ao_bitmap |= mask;
6742                                         counter ++;
6743                                 }
6744                                 mask <<= 1;
6745                         }
6746                         active_cu_number += counter;
6747                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6748                 }
6749         }
6750         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6751         mutex_unlock(&adev->grbm_idx_mutex);
6752
6753         cu_info->number = active_cu_number;
6754         cu_info->ao_cu_mask = ao_cu_mask;
6755 }