drm/amdgpu/dce11: add dce clock setting for ELM/BAF
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45
46 #include "dce/dce_10_0_d.h"
47 #include "dce/dce_10_0_sh_mask.h"
48
49 #define GFX8_NUM_GFX_RINGS     1
50 #define GFX8_NUM_COMPUTE_RINGS 8
51
52 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
53 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
55
56 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
57 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
58 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
59 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
60 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
61 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
62 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
63 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
64 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
65
66 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
67 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
68 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
69 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
70 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
71 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
72
73 /* BPM SERDES CMD */
74 #define SET_BPM_SERDES_CMD    1
75 #define CLE_BPM_SERDES_CMD    0
76
77 /* BPM Register Address*/
78 enum {
79         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
80         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
81         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
82         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
83         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
84         BPM_REG_FGCG_MAX
85 };
86
87 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
88 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
90 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
95 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
97 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
99
100 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
101 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
103 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
108 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
110 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
114 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
116 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
117 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
119
120 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
121 {
122         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
123         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
124         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
125         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
126         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
127         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
128         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
129         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
130         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
131         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
132         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
133         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
134         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
135         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
136         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
137         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
138 };
139
140 static const u32 golden_settings_tonga_a11[] =
141 {
142         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
143         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
144         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
145         mmGB_GPU_ID, 0x0000000f, 0x00000000,
146         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
147         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
148         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
149         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
150         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
151         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
152         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
153         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
154         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
155         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
156         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
157 };
158
159 static const u32 tonga_golden_common_all[] =
160 {
161         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
162         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
163         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
164         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
165         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
166         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
167         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
168         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
169 };
170
171 static const u32 tonga_mgcg_cgcg_init[] =
172 {
173         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
174         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
175         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
176         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
177         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
178         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
179         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
180         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
181         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
182         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
183         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
184         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
185         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
186         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
187         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
188         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
189         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
190         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
191         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
192         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
193         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
194         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
195         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
196         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
198         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
199         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
200         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
201         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
202         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
203         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
205         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
206         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
207         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
208         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
209         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
210         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
211         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
212         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
213         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
214         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
215         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
216         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
217         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
218         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
219         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
220         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
221         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
222         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
223         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
224         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
225         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
226         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
227         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
228         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
229         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
230         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
231         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
232         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
233         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
234         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
245         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
246         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
247         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
248 };
249
250 static const u32 fiji_golden_common_all[] =
251 {
252         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
253         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
254         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
255         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
256         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
257         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
258         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
259         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
262 };
263
264 static const u32 golden_settings_fiji_a10[] =
265 {
266         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
267         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
268         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
269         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
270         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
271         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
272         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
273         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
274         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
275         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
276         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
277 };
278
279 static const u32 fiji_mgcg_cgcg_init[] =
280 {
281         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
282         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
283         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
284         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
285         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
286         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
287         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
288         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
289         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
290         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
291         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
292         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
293         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
294         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
295         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
296         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
297         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
298         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
299         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
300         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
301         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
302         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
303         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
304         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
305         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
306         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
307         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
308         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
309         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
310         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
311         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
312         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
313         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
314         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
315         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
316 };
317
318 static const u32 golden_settings_iceland_a11[] =
319 {
320         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
321         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
322         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
323         mmGB_GPU_ID, 0x0000000f, 0x00000000,
324         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
325         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
326         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
327         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
328         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
329         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
330         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
331         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
332         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
333         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
334         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
335 };
336
337 static const u32 iceland_golden_common_all[] =
338 {
339         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
341         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
342         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
343         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
344         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
345         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
346         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
347 };
348
349 static const u32 iceland_mgcg_cgcg_init[] =
350 {
351         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
352         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
353         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
354         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
355         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
356         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
357         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
358         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
359         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
360         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
361         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
362         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
363         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
364         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
365         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
366         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
367         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
368         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
369         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
370         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
371         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
372         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
373         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
374         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
376         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
377         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
378         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
379         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
380         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
381         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
382         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
383         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
384         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
385         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
386         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
387         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
388         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
389         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
390         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
391         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
392         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
393         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
394         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
395         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
396         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
397         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
398         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
399         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
400         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
401         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
402         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
403         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
404         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
405         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
406         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
407         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
408         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
409         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
410         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
411         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
412         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
413         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
414         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
415 };
416
417 static const u32 cz_golden_settings_a11[] =
418 {
419         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
420         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421         mmGB_GPU_ID, 0x0000000f, 0x00000000,
422         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
423         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
424         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
428         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
429 };
430
431 static const u32 cz_golden_common_all[] =
432 {
433         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
440         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
441 };
442
443 static const u32 cz_mgcg_cgcg_init[] =
444 {
445         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
468         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
479         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
499         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
507         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
508         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
509         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
510         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
511         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
512         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
513         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
514         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
515         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
516         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
517         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
518         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
519         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
520 };
521
522 static const u32 stoney_golden_settings_a11[] =
523 {
524         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
525         mmGB_GPU_ID, 0x0000000f, 0x00000000,
526         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
527         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
528         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
529         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
530         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
531         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
532         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
533         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
534 };
535
536 static const u32 stoney_golden_common_all[] =
537 {
538         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
539         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
540         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
541         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
542         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
543         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
544         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
545         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
546 };
547
548 static const u32 stoney_mgcg_cgcg_init[] =
549 {
550         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
551         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
552         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
553         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
554         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
555         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
556 };
557
558 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
559 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
560 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
561 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
562
563 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
564 {
565         switch (adev->asic_type) {
566         case CHIP_TOPAZ:
567                 amdgpu_program_register_sequence(adev,
568                                                  iceland_mgcg_cgcg_init,
569                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
570                 amdgpu_program_register_sequence(adev,
571                                                  golden_settings_iceland_a11,
572                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
573                 amdgpu_program_register_sequence(adev,
574                                                  iceland_golden_common_all,
575                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
576                 break;
577         case CHIP_FIJI:
578                 amdgpu_program_register_sequence(adev,
579                                                  fiji_mgcg_cgcg_init,
580                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
581                 amdgpu_program_register_sequence(adev,
582                                                  golden_settings_fiji_a10,
583                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
584                 amdgpu_program_register_sequence(adev,
585                                                  fiji_golden_common_all,
586                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
587                 break;
588
589         case CHIP_TONGA:
590                 amdgpu_program_register_sequence(adev,
591                                                  tonga_mgcg_cgcg_init,
592                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
593                 amdgpu_program_register_sequence(adev,
594                                                  golden_settings_tonga_a11,
595                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
596                 amdgpu_program_register_sequence(adev,
597                                                  tonga_golden_common_all,
598                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
599                 break;
600         case CHIP_CARRIZO:
601                 amdgpu_program_register_sequence(adev,
602                                                  cz_mgcg_cgcg_init,
603                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
604                 amdgpu_program_register_sequence(adev,
605                                                  cz_golden_settings_a11,
606                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
607                 amdgpu_program_register_sequence(adev,
608                                                  cz_golden_common_all,
609                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
610                 break;
611         case CHIP_STONEY:
612                 amdgpu_program_register_sequence(adev,
613                                                  stoney_mgcg_cgcg_init,
614                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
615                 amdgpu_program_register_sequence(adev,
616                                                  stoney_golden_settings_a11,
617                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
618                 amdgpu_program_register_sequence(adev,
619                                                  stoney_golden_common_all,
620                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
621                 break;
622         default:
623                 break;
624         }
625 }
626
627 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
628 {
629         int i;
630
631         adev->gfx.scratch.num_reg = 7;
632         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
633         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
634                 adev->gfx.scratch.free[i] = true;
635                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
636         }
637 }
638
639 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
640 {
641         struct amdgpu_device *adev = ring->adev;
642         uint32_t scratch;
643         uint32_t tmp = 0;
644         unsigned i;
645         int r;
646
647         r = amdgpu_gfx_scratch_get(adev, &scratch);
648         if (r) {
649                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
650                 return r;
651         }
652         WREG32(scratch, 0xCAFEDEAD);
653         r = amdgpu_ring_alloc(ring, 3);
654         if (r) {
655                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
656                           ring->idx, r);
657                 amdgpu_gfx_scratch_free(adev, scratch);
658                 return r;
659         }
660         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
661         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
662         amdgpu_ring_write(ring, 0xDEADBEEF);
663         amdgpu_ring_commit(ring);
664
665         for (i = 0; i < adev->usec_timeout; i++) {
666                 tmp = RREG32(scratch);
667                 if (tmp == 0xDEADBEEF)
668                         break;
669                 DRM_UDELAY(1);
670         }
671         if (i < adev->usec_timeout) {
672                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
673                          ring->idx, i);
674         } else {
675                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
676                           ring->idx, scratch, tmp);
677                 r = -EINVAL;
678         }
679         amdgpu_gfx_scratch_free(adev, scratch);
680         return r;
681 }
682
683 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
684 {
685         struct amdgpu_device *adev = ring->adev;
686         struct amdgpu_ib ib;
687         struct fence *f = NULL;
688         uint32_t scratch;
689         uint32_t tmp = 0;
690         unsigned i;
691         int r;
692
693         r = amdgpu_gfx_scratch_get(adev, &scratch);
694         if (r) {
695                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
696                 return r;
697         }
698         WREG32(scratch, 0xCAFEDEAD);
699         memset(&ib, 0, sizeof(ib));
700         r = amdgpu_ib_get(adev, NULL, 256, &ib);
701         if (r) {
702                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
703                 goto err1;
704         }
705         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
706         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
707         ib.ptr[2] = 0xDEADBEEF;
708         ib.length_dw = 3;
709
710         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
711         if (r)
712                 goto err2;
713
714         r = fence_wait(f, false);
715         if (r) {
716                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
717                 goto err2;
718         }
719         for (i = 0; i < adev->usec_timeout; i++) {
720                 tmp = RREG32(scratch);
721                 if (tmp == 0xDEADBEEF)
722                         break;
723                 DRM_UDELAY(1);
724         }
725         if (i < adev->usec_timeout) {
726                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
727                          ring->idx, i);
728                 goto err2;
729         } else {
730                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
731                           scratch, tmp);
732                 r = -EINVAL;
733         }
734 err2:
735         fence_put(f);
736         amdgpu_ib_free(adev, &ib, NULL);
737         fence_put(f);
738 err1:
739         amdgpu_gfx_scratch_free(adev, scratch);
740         return r;
741 }
742
743 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
744 {
745         const char *chip_name;
746         char fw_name[30];
747         int err;
748         struct amdgpu_firmware_info *info = NULL;
749         const struct common_firmware_header *header = NULL;
750         const struct gfx_firmware_header_v1_0 *cp_hdr;
751
752         DRM_DEBUG("\n");
753
754         switch (adev->asic_type) {
755         case CHIP_TOPAZ:
756                 chip_name = "topaz";
757                 break;
758         case CHIP_TONGA:
759                 chip_name = "tonga";
760                 break;
761         case CHIP_CARRIZO:
762                 chip_name = "carrizo";
763                 break;
764         case CHIP_FIJI:
765                 chip_name = "fiji";
766                 break;
767         case CHIP_STONEY:
768                 chip_name = "stoney";
769                 break;
770         default:
771                 BUG();
772         }
773
774         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
775         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
776         if (err)
777                 goto out;
778         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
779         if (err)
780                 goto out;
781         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
782         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
783         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
784
785         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
786         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
787         if (err)
788                 goto out;
789         err = amdgpu_ucode_validate(adev->gfx.me_fw);
790         if (err)
791                 goto out;
792         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
793         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
794         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
795
796         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
797         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
798         if (err)
799                 goto out;
800         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
801         if (err)
802                 goto out;
803         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
804         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
805         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
806
807         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
808         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
809         if (err)
810                 goto out;
811         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
812         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
813         adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
814         adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
815
816         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
817         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
818         if (err)
819                 goto out;
820         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
821         if (err)
822                 goto out;
823         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
824         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
825         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
826
827         if ((adev->asic_type != CHIP_STONEY) &&
828             (adev->asic_type != CHIP_TOPAZ)) {
829                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
830                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
831                 if (!err) {
832                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
833                         if (err)
834                                 goto out;
835                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
836                                 adev->gfx.mec2_fw->data;
837                         adev->gfx.mec2_fw_version =
838                                 le32_to_cpu(cp_hdr->header.ucode_version);
839                         adev->gfx.mec2_feature_version =
840                                 le32_to_cpu(cp_hdr->ucode_feature_version);
841                 } else {
842                         err = 0;
843                         adev->gfx.mec2_fw = NULL;
844                 }
845         }
846
847         if (adev->firmware.smu_load) {
848                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
849                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
850                 info->fw = adev->gfx.pfp_fw;
851                 header = (const struct common_firmware_header *)info->fw->data;
852                 adev->firmware.fw_size +=
853                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
854
855                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
856                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
857                 info->fw = adev->gfx.me_fw;
858                 header = (const struct common_firmware_header *)info->fw->data;
859                 adev->firmware.fw_size +=
860                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
861
862                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
863                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
864                 info->fw = adev->gfx.ce_fw;
865                 header = (const struct common_firmware_header *)info->fw->data;
866                 adev->firmware.fw_size +=
867                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
868
869                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
870                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
871                 info->fw = adev->gfx.rlc_fw;
872                 header = (const struct common_firmware_header *)info->fw->data;
873                 adev->firmware.fw_size +=
874                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
875
876                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
877                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
878                 info->fw = adev->gfx.mec_fw;
879                 header = (const struct common_firmware_header *)info->fw->data;
880                 adev->firmware.fw_size +=
881                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
882
883                 if (adev->gfx.mec2_fw) {
884                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
885                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
886                         info->fw = adev->gfx.mec2_fw;
887                         header = (const struct common_firmware_header *)info->fw->data;
888                         adev->firmware.fw_size +=
889                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
890                 }
891
892         }
893
894 out:
895         if (err) {
896                 dev_err(adev->dev,
897                         "gfx8: Failed to load firmware \"%s\"\n",
898                         fw_name);
899                 release_firmware(adev->gfx.pfp_fw);
900                 adev->gfx.pfp_fw = NULL;
901                 release_firmware(adev->gfx.me_fw);
902                 adev->gfx.me_fw = NULL;
903                 release_firmware(adev->gfx.ce_fw);
904                 adev->gfx.ce_fw = NULL;
905                 release_firmware(adev->gfx.rlc_fw);
906                 adev->gfx.rlc_fw = NULL;
907                 release_firmware(adev->gfx.mec_fw);
908                 adev->gfx.mec_fw = NULL;
909                 release_firmware(adev->gfx.mec2_fw);
910                 adev->gfx.mec2_fw = NULL;
911         }
912         return err;
913 }
914
915 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
916 {
917         int r;
918
919         if (adev->gfx.mec.hpd_eop_obj) {
920                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
921                 if (unlikely(r != 0))
922                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
923                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
924                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
925
926                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
927                 adev->gfx.mec.hpd_eop_obj = NULL;
928         }
929 }
930
931 #define MEC_HPD_SIZE 2048
932
933 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
934 {
935         int r;
936         u32 *hpd;
937
938         /*
939          * we assign only 1 pipe because all other pipes will
940          * be handled by KFD
941          */
942         adev->gfx.mec.num_mec = 1;
943         adev->gfx.mec.num_pipe = 1;
944         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
945
946         if (adev->gfx.mec.hpd_eop_obj == NULL) {
947                 r = amdgpu_bo_create(adev,
948                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
949                                      PAGE_SIZE, true,
950                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
951                                      &adev->gfx.mec.hpd_eop_obj);
952                 if (r) {
953                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
954                         return r;
955                 }
956         }
957
958         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
959         if (unlikely(r != 0)) {
960                 gfx_v8_0_mec_fini(adev);
961                 return r;
962         }
963         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
964                           &adev->gfx.mec.hpd_eop_gpu_addr);
965         if (r) {
966                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
967                 gfx_v8_0_mec_fini(adev);
968                 return r;
969         }
970         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
971         if (r) {
972                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
973                 gfx_v8_0_mec_fini(adev);
974                 return r;
975         }
976
977         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
978
979         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
980         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
981
982         return 0;
983 }
984
985 static const u32 vgpr_init_compute_shader[] =
986 {
987         0x7e000209, 0x7e020208,
988         0x7e040207, 0x7e060206,
989         0x7e080205, 0x7e0a0204,
990         0x7e0c0203, 0x7e0e0202,
991         0x7e100201, 0x7e120200,
992         0x7e140209, 0x7e160208,
993         0x7e180207, 0x7e1a0206,
994         0x7e1c0205, 0x7e1e0204,
995         0x7e200203, 0x7e220202,
996         0x7e240201, 0x7e260200,
997         0x7e280209, 0x7e2a0208,
998         0x7e2c0207, 0x7e2e0206,
999         0x7e300205, 0x7e320204,
1000         0x7e340203, 0x7e360202,
1001         0x7e380201, 0x7e3a0200,
1002         0x7e3c0209, 0x7e3e0208,
1003         0x7e400207, 0x7e420206,
1004         0x7e440205, 0x7e460204,
1005         0x7e480203, 0x7e4a0202,
1006         0x7e4c0201, 0x7e4e0200,
1007         0x7e500209, 0x7e520208,
1008         0x7e540207, 0x7e560206,
1009         0x7e580205, 0x7e5a0204,
1010         0x7e5c0203, 0x7e5e0202,
1011         0x7e600201, 0x7e620200,
1012         0x7e640209, 0x7e660208,
1013         0x7e680207, 0x7e6a0206,
1014         0x7e6c0205, 0x7e6e0204,
1015         0x7e700203, 0x7e720202,
1016         0x7e740201, 0x7e760200,
1017         0x7e780209, 0x7e7a0208,
1018         0x7e7c0207, 0x7e7e0206,
1019         0xbf8a0000, 0xbf810000,
1020 };
1021
1022 static const u32 sgpr_init_compute_shader[] =
1023 {
1024         0xbe8a0100, 0xbe8c0102,
1025         0xbe8e0104, 0xbe900106,
1026         0xbe920108, 0xbe940100,
1027         0xbe960102, 0xbe980104,
1028         0xbe9a0106, 0xbe9c0108,
1029         0xbe9e0100, 0xbea00102,
1030         0xbea20104, 0xbea40106,
1031         0xbea60108, 0xbea80100,
1032         0xbeaa0102, 0xbeac0104,
1033         0xbeae0106, 0xbeb00108,
1034         0xbeb20100, 0xbeb40102,
1035         0xbeb60104, 0xbeb80106,
1036         0xbeba0108, 0xbebc0100,
1037         0xbebe0102, 0xbec00104,
1038         0xbec20106, 0xbec40108,
1039         0xbec60100, 0xbec80102,
1040         0xbee60004, 0xbee70005,
1041         0xbeea0006, 0xbeeb0007,
1042         0xbee80008, 0xbee90009,
1043         0xbefc0000, 0xbf8a0000,
1044         0xbf810000, 0x00000000,
1045 };
1046
1047 static const u32 vgpr_init_regs[] =
1048 {
1049         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1050         mmCOMPUTE_RESOURCE_LIMITS, 0,
1051         mmCOMPUTE_NUM_THREAD_X, 256*4,
1052         mmCOMPUTE_NUM_THREAD_Y, 1,
1053         mmCOMPUTE_NUM_THREAD_Z, 1,
1054         mmCOMPUTE_PGM_RSRC2, 20,
1055         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1056         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1057         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1058         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1059         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1060         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1061         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1062         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1063         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1064         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1065 };
1066
1067 static const u32 sgpr1_init_regs[] =
1068 {
1069         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1070         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1071         mmCOMPUTE_NUM_THREAD_X, 256*5,
1072         mmCOMPUTE_NUM_THREAD_Y, 1,
1073         mmCOMPUTE_NUM_THREAD_Z, 1,
1074         mmCOMPUTE_PGM_RSRC2, 20,
1075         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1076         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1077         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1078         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1079         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1080         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1081         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1082         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1083         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1084         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1085 };
1086
1087 static const u32 sgpr2_init_regs[] =
1088 {
1089         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1090         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1091         mmCOMPUTE_NUM_THREAD_X, 256*5,
1092         mmCOMPUTE_NUM_THREAD_Y, 1,
1093         mmCOMPUTE_NUM_THREAD_Z, 1,
1094         mmCOMPUTE_PGM_RSRC2, 20,
1095         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1096         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1097         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1098         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1099         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1100         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1101         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1102         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1103         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1104         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1105 };
1106
1107 static const u32 sec_ded_counter_registers[] =
1108 {
1109         mmCPC_EDC_ATC_CNT,
1110         mmCPC_EDC_SCRATCH_CNT,
1111         mmCPC_EDC_UCODE_CNT,
1112         mmCPF_EDC_ATC_CNT,
1113         mmCPF_EDC_ROQ_CNT,
1114         mmCPF_EDC_TAG_CNT,
1115         mmCPG_EDC_ATC_CNT,
1116         mmCPG_EDC_DMA_CNT,
1117         mmCPG_EDC_TAG_CNT,
1118         mmDC_EDC_CSINVOC_CNT,
1119         mmDC_EDC_RESTORE_CNT,
1120         mmDC_EDC_STATE_CNT,
1121         mmGDS_EDC_CNT,
1122         mmGDS_EDC_GRBM_CNT,
1123         mmGDS_EDC_OA_DED,
1124         mmSPI_EDC_CNT,
1125         mmSQC_ATC_EDC_GATCL1_CNT,
1126         mmSQC_EDC_CNT,
1127         mmSQ_EDC_DED_CNT,
1128         mmSQ_EDC_INFO,
1129         mmSQ_EDC_SEC_CNT,
1130         mmTCC_EDC_CNT,
1131         mmTCP_ATC_EDC_GATCL1_CNT,
1132         mmTCP_EDC_CNT,
1133         mmTD_EDC_CNT
1134 };
1135
1136 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1137 {
1138         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1139         struct amdgpu_ib ib;
1140         struct fence *f = NULL;
1141         int r, i;
1142         u32 tmp;
1143         unsigned total_size, vgpr_offset, sgpr_offset;
1144         u64 gpu_addr;
1145
1146         /* only supported on CZ */
1147         if (adev->asic_type != CHIP_CARRIZO)
1148                 return 0;
1149
1150         /* bail if the compute ring is not ready */
1151         if (!ring->ready)
1152                 return 0;
1153
1154         tmp = RREG32(mmGB_EDC_MODE);
1155         WREG32(mmGB_EDC_MODE, 0);
1156
1157         total_size =
1158                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1159         total_size +=
1160                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1161         total_size +=
1162                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1163         total_size = ALIGN(total_size, 256);
1164         vgpr_offset = total_size;
1165         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1166         sgpr_offset = total_size;
1167         total_size += sizeof(sgpr_init_compute_shader);
1168
1169         /* allocate an indirect buffer to put the commands in */
1170         memset(&ib, 0, sizeof(ib));
1171         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1172         if (r) {
1173                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1174                 return r;
1175         }
1176
1177         /* load the compute shaders */
1178         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1179                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1180
1181         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1182                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1183
1184         /* init the ib length to 0 */
1185         ib.length_dw = 0;
1186
1187         /* VGPR */
1188         /* write the register state for the compute dispatch */
1189         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1190                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1191                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1192                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1193         }
1194         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1195         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1196         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1197         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1198         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1199         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1200
1201         /* write dispatch packet */
1202         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1203         ib.ptr[ib.length_dw++] = 8; /* x */
1204         ib.ptr[ib.length_dw++] = 1; /* y */
1205         ib.ptr[ib.length_dw++] = 1; /* z */
1206         ib.ptr[ib.length_dw++] =
1207                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1208
1209         /* write CS partial flush packet */
1210         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1211         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1212
1213         /* SGPR1 */
1214         /* write the register state for the compute dispatch */
1215         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1216                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1217                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1218                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1219         }
1220         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1221         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1222         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1223         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1224         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1225         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1226
1227         /* write dispatch packet */
1228         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1229         ib.ptr[ib.length_dw++] = 8; /* x */
1230         ib.ptr[ib.length_dw++] = 1; /* y */
1231         ib.ptr[ib.length_dw++] = 1; /* z */
1232         ib.ptr[ib.length_dw++] =
1233                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1234
1235         /* write CS partial flush packet */
1236         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1237         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1238
1239         /* SGPR2 */
1240         /* write the register state for the compute dispatch */
1241         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1242                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1243                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1244                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1245         }
1246         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1247         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1248         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1249         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1250         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1251         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1252
1253         /* write dispatch packet */
1254         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1255         ib.ptr[ib.length_dw++] = 8; /* x */
1256         ib.ptr[ib.length_dw++] = 1; /* y */
1257         ib.ptr[ib.length_dw++] = 1; /* z */
1258         ib.ptr[ib.length_dw++] =
1259                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1260
1261         /* write CS partial flush packet */
1262         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1263         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1264
1265         /* shedule the ib on the ring */
1266         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1267         if (r) {
1268                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1269                 goto fail;
1270         }
1271
1272         /* wait for the GPU to finish processing the IB */
1273         r = fence_wait(f, false);
1274         if (r) {
1275                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1276                 goto fail;
1277         }
1278
1279         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1280         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1281         WREG32(mmGB_EDC_MODE, tmp);
1282
1283         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1284         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1285         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1286
1287
1288         /* read back registers to clear the counters */
1289         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1290                 RREG32(sec_ded_counter_registers[i]);
1291
1292 fail:
1293         fence_put(f);
1294         amdgpu_ib_free(adev, &ib, NULL);
1295         fence_put(f);
1296
1297         return r;
1298 }
1299
1300 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1301 {
1302         u32 gb_addr_config;
1303         u32 mc_shared_chmap, mc_arb_ramcfg;
1304         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1305         u32 tmp;
1306
1307         switch (adev->asic_type) {
1308         case CHIP_TOPAZ:
1309                 adev->gfx.config.max_shader_engines = 1;
1310                 adev->gfx.config.max_tile_pipes = 2;
1311                 adev->gfx.config.max_cu_per_sh = 6;
1312                 adev->gfx.config.max_sh_per_se = 1;
1313                 adev->gfx.config.max_backends_per_se = 2;
1314                 adev->gfx.config.max_texture_channel_caches = 2;
1315                 adev->gfx.config.max_gprs = 256;
1316                 adev->gfx.config.max_gs_threads = 32;
1317                 adev->gfx.config.max_hw_contexts = 8;
1318
1319                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1320                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1321                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1322                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1323                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1324                 break;
1325         case CHIP_FIJI:
1326                 adev->gfx.config.max_shader_engines = 4;
1327                 adev->gfx.config.max_tile_pipes = 16;
1328                 adev->gfx.config.max_cu_per_sh = 16;
1329                 adev->gfx.config.max_sh_per_se = 1;
1330                 adev->gfx.config.max_backends_per_se = 4;
1331                 adev->gfx.config.max_texture_channel_caches = 16;
1332                 adev->gfx.config.max_gprs = 256;
1333                 adev->gfx.config.max_gs_threads = 32;
1334                 adev->gfx.config.max_hw_contexts = 8;
1335
1336                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1337                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1338                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1339                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1340                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1341                 break;
1342         case CHIP_TONGA:
1343                 adev->gfx.config.max_shader_engines = 4;
1344                 adev->gfx.config.max_tile_pipes = 8;
1345                 adev->gfx.config.max_cu_per_sh = 8;
1346                 adev->gfx.config.max_sh_per_se = 1;
1347                 adev->gfx.config.max_backends_per_se = 2;
1348                 adev->gfx.config.max_texture_channel_caches = 8;
1349                 adev->gfx.config.max_gprs = 256;
1350                 adev->gfx.config.max_gs_threads = 32;
1351                 adev->gfx.config.max_hw_contexts = 8;
1352
1353                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1354                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1355                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1356                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1357                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1358                 break;
1359         case CHIP_CARRIZO:
1360                 adev->gfx.config.max_shader_engines = 1;
1361                 adev->gfx.config.max_tile_pipes = 2;
1362                 adev->gfx.config.max_sh_per_se = 1;
1363                 adev->gfx.config.max_backends_per_se = 2;
1364
1365                 switch (adev->pdev->revision) {
1366                 case 0xc4:
1367                 case 0x84:
1368                 case 0xc8:
1369                 case 0xcc:
1370                 case 0xe1:
1371                 case 0xe3:
1372                         /* B10 */
1373                         adev->gfx.config.max_cu_per_sh = 8;
1374                         break;
1375                 case 0xc5:
1376                 case 0x81:
1377                 case 0x85:
1378                 case 0xc9:
1379                 case 0xcd:
1380                 case 0xe2:
1381                 case 0xe4:
1382                         /* B8 */
1383                         adev->gfx.config.max_cu_per_sh = 6;
1384                         break;
1385                 case 0xc6:
1386                 case 0xca:
1387                 case 0xce:
1388                 case 0x88:
1389                         /* B6 */
1390                         adev->gfx.config.max_cu_per_sh = 6;
1391                         break;
1392                 case 0xc7:
1393                 case 0x87:
1394                 case 0xcb:
1395                 case 0xe5:
1396                 case 0x89:
1397                 default:
1398                         /* B4 */
1399                         adev->gfx.config.max_cu_per_sh = 4;
1400                         break;
1401                 }
1402
1403                 adev->gfx.config.max_texture_channel_caches = 2;
1404                 adev->gfx.config.max_gprs = 256;
1405                 adev->gfx.config.max_gs_threads = 32;
1406                 adev->gfx.config.max_hw_contexts = 8;
1407
1408                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1409                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1410                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1411                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1412                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1413                 break;
1414         case CHIP_STONEY:
1415                 adev->gfx.config.max_shader_engines = 1;
1416                 adev->gfx.config.max_tile_pipes = 2;
1417                 adev->gfx.config.max_sh_per_se = 1;
1418                 adev->gfx.config.max_backends_per_se = 1;
1419
1420                 switch (adev->pdev->revision) {
1421                 case 0xc0:
1422                 case 0xc1:
1423                 case 0xc2:
1424                 case 0xc4:
1425                 case 0xc8:
1426                 case 0xc9:
1427                         adev->gfx.config.max_cu_per_sh = 3;
1428                         break;
1429                 case 0xd0:
1430                 case 0xd1:
1431                 case 0xd2:
1432                 default:
1433                         adev->gfx.config.max_cu_per_sh = 2;
1434                         break;
1435                 }
1436
1437                 adev->gfx.config.max_texture_channel_caches = 2;
1438                 adev->gfx.config.max_gprs = 256;
1439                 adev->gfx.config.max_gs_threads = 16;
1440                 adev->gfx.config.max_hw_contexts = 8;
1441
1442                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1443                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1444                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1445                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1446                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1447                 break;
1448         default:
1449                 adev->gfx.config.max_shader_engines = 2;
1450                 adev->gfx.config.max_tile_pipes = 4;
1451                 adev->gfx.config.max_cu_per_sh = 2;
1452                 adev->gfx.config.max_sh_per_se = 1;
1453                 adev->gfx.config.max_backends_per_se = 2;
1454                 adev->gfx.config.max_texture_channel_caches = 4;
1455                 adev->gfx.config.max_gprs = 256;
1456                 adev->gfx.config.max_gs_threads = 32;
1457                 adev->gfx.config.max_hw_contexts = 8;
1458
1459                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1460                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1461                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1462                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1463                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1464                 break;
1465         }
1466
1467         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1468         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1469         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1470
1471         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1472         adev->gfx.config.mem_max_burst_length_bytes = 256;
1473         if (adev->flags & AMD_IS_APU) {
1474                 /* Get memory bank mapping mode. */
1475                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1476                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1477                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1478
1479                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1480                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1481                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1482
1483                 /* Validate settings in case only one DIMM installed. */
1484                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1485                         dimm00_addr_map = 0;
1486                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1487                         dimm01_addr_map = 0;
1488                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1489                         dimm10_addr_map = 0;
1490                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1491                         dimm11_addr_map = 0;
1492
1493                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1494                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1495                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1496                         adev->gfx.config.mem_row_size_in_kb = 2;
1497                 else
1498                         adev->gfx.config.mem_row_size_in_kb = 1;
1499         } else {
1500                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1501                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1502                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1503                         adev->gfx.config.mem_row_size_in_kb = 4;
1504         }
1505
1506         adev->gfx.config.shader_engine_tile_size = 32;
1507         adev->gfx.config.num_gpus = 1;
1508         adev->gfx.config.multi_gpu_tile_size = 64;
1509
1510         /* fix up row size */
1511         switch (adev->gfx.config.mem_row_size_in_kb) {
1512         case 1:
1513         default:
1514                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1515                 break;
1516         case 2:
1517                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1518                 break;
1519         case 4:
1520                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1521                 break;
1522         }
1523         adev->gfx.config.gb_addr_config = gb_addr_config;
1524 }
1525
1526 static int gfx_v8_0_sw_init(void *handle)
1527 {
1528         int i, r;
1529         struct amdgpu_ring *ring;
1530         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1531
1532         /* EOP Event */
1533         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1534         if (r)
1535                 return r;
1536
1537         /* Privileged reg */
1538         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1539         if (r)
1540                 return r;
1541
1542         /* Privileged inst */
1543         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1544         if (r)
1545                 return r;
1546
1547         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1548
1549         gfx_v8_0_scratch_init(adev);
1550
1551         r = gfx_v8_0_init_microcode(adev);
1552         if (r) {
1553                 DRM_ERROR("Failed to load gfx firmware!\n");
1554                 return r;
1555         }
1556
1557         r = gfx_v8_0_mec_init(adev);
1558         if (r) {
1559                 DRM_ERROR("Failed to init MEC BOs!\n");
1560                 return r;
1561         }
1562
1563         /* set up the gfx ring */
1564         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1565                 ring = &adev->gfx.gfx_ring[i];
1566                 ring->ring_obj = NULL;
1567                 sprintf(ring->name, "gfx");
1568                 /* no gfx doorbells on iceland */
1569                 if (adev->asic_type != CHIP_TOPAZ) {
1570                         ring->use_doorbell = true;
1571                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1572                 }
1573
1574                 r = amdgpu_ring_init(adev, ring, 1024,
1575                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1576                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1577                                      AMDGPU_RING_TYPE_GFX);
1578                 if (r)
1579                         return r;
1580         }
1581
1582         /* set up the compute queues */
1583         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1584                 unsigned irq_type;
1585
1586                 /* max 32 queues per MEC */
1587                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1588                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1589                         break;
1590                 }
1591                 ring = &adev->gfx.compute_ring[i];
1592                 ring->ring_obj = NULL;
1593                 ring->use_doorbell = true;
1594                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1595                 ring->me = 1; /* first MEC */
1596                 ring->pipe = i / 8;
1597                 ring->queue = i % 8;
1598                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1599                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1600                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1601                 r = amdgpu_ring_init(adev, ring, 1024,
1602                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1603                                      &adev->gfx.eop_irq, irq_type,
1604                                      AMDGPU_RING_TYPE_COMPUTE);
1605                 if (r)
1606                         return r;
1607         }
1608
1609         /* reserve GDS, GWS and OA resource for gfx */
1610         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1611                         PAGE_SIZE, true,
1612                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1613                         NULL, &adev->gds.gds_gfx_bo);
1614         if (r)
1615                 return r;
1616
1617         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1618                 PAGE_SIZE, true,
1619                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1620                 NULL, &adev->gds.gws_gfx_bo);
1621         if (r)
1622                 return r;
1623
1624         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1625                         PAGE_SIZE, true,
1626                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1627                         NULL, &adev->gds.oa_gfx_bo);
1628         if (r)
1629                 return r;
1630
1631         adev->gfx.ce_ram_size = 0x8000;
1632
1633         gfx_v8_0_gpu_early_init(adev);
1634
1635         return 0;
1636 }
1637
1638 static int gfx_v8_0_sw_fini(void *handle)
1639 {
1640         int i;
1641         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1642
1643         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1644         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1645         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1646
1647         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1648                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1649         for (i = 0; i < adev->gfx.num_compute_rings; i++)
1650                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1651
1652         gfx_v8_0_mec_fini(adev);
1653
1654         return 0;
1655 }
1656
1657 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1658 {
1659         uint32_t *modearray, *mod2array;
1660         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1661         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1662         u32 reg_offset;
1663
1664         modearray = adev->gfx.config.tile_mode_array;
1665         mod2array = adev->gfx.config.macrotile_mode_array;
1666
1667         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1668                 modearray[reg_offset] = 0;
1669
1670         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
1671                 mod2array[reg_offset] = 0;
1672
1673         switch (adev->asic_type) {
1674         case CHIP_TOPAZ:
1675                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1676                                 PIPE_CONFIG(ADDR_SURF_P2) |
1677                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1678                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1679                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1680                                 PIPE_CONFIG(ADDR_SURF_P2) |
1681                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1682                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1683                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1684                                 PIPE_CONFIG(ADDR_SURF_P2) |
1685                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1686                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1687                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1688                                 PIPE_CONFIG(ADDR_SURF_P2) |
1689                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1690                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1691                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1692                                 PIPE_CONFIG(ADDR_SURF_P2) |
1693                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1694                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1695                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1696                                 PIPE_CONFIG(ADDR_SURF_P2) |
1697                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1698                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1699                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1700                                 PIPE_CONFIG(ADDR_SURF_P2) |
1701                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1702                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1703                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1704                                 PIPE_CONFIG(ADDR_SURF_P2));
1705                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1706                                 PIPE_CONFIG(ADDR_SURF_P2) |
1707                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1708                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1709                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1710                                  PIPE_CONFIG(ADDR_SURF_P2) |
1711                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1712                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1713                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1714                                  PIPE_CONFIG(ADDR_SURF_P2) |
1715                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1716                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1717                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1718                                  PIPE_CONFIG(ADDR_SURF_P2) |
1719                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1720                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1721                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1722                                  PIPE_CONFIG(ADDR_SURF_P2) |
1723                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1724                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1725                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1726                                  PIPE_CONFIG(ADDR_SURF_P2) |
1727                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1728                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1729                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1730                                  PIPE_CONFIG(ADDR_SURF_P2) |
1731                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1732                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1733                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1734                                  PIPE_CONFIG(ADDR_SURF_P2) |
1735                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1736                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1737                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1738                                  PIPE_CONFIG(ADDR_SURF_P2) |
1739                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1740                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1741                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1742                                  PIPE_CONFIG(ADDR_SURF_P2) |
1743                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1744                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1745                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1746                                  PIPE_CONFIG(ADDR_SURF_P2) |
1747                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1748                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1749                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1750                                  PIPE_CONFIG(ADDR_SURF_P2) |
1751                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1752                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1753                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1754                                  PIPE_CONFIG(ADDR_SURF_P2) |
1755                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1756                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1757                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1758                                  PIPE_CONFIG(ADDR_SURF_P2) |
1759                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1760                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1761                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1762                                  PIPE_CONFIG(ADDR_SURF_P2) |
1763                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1764                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1765                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1766                                  PIPE_CONFIG(ADDR_SURF_P2) |
1767                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1768                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1769                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770                                  PIPE_CONFIG(ADDR_SURF_P2) |
1771                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1772                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1773                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1774                                  PIPE_CONFIG(ADDR_SURF_P2) |
1775                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1776                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1777
1778                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1779                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1780                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1781                                 NUM_BANKS(ADDR_SURF_8_BANK));
1782                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1783                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1784                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1785                                 NUM_BANKS(ADDR_SURF_8_BANK));
1786                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1787                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1788                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1789                                 NUM_BANKS(ADDR_SURF_8_BANK));
1790                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1791                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1792                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1793                                 NUM_BANKS(ADDR_SURF_8_BANK));
1794                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1796                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1797                                 NUM_BANKS(ADDR_SURF_8_BANK));
1798                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1800                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1801                                 NUM_BANKS(ADDR_SURF_8_BANK));
1802                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1803                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1804                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1805                                 NUM_BANKS(ADDR_SURF_8_BANK));
1806                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1807                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1808                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1809                                 NUM_BANKS(ADDR_SURF_16_BANK));
1810                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1811                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1812                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813                                 NUM_BANKS(ADDR_SURF_16_BANK));
1814                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1815                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1816                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1817                                  NUM_BANKS(ADDR_SURF_16_BANK));
1818                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1819                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1820                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1821                                  NUM_BANKS(ADDR_SURF_16_BANK));
1822                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1823                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1825                                  NUM_BANKS(ADDR_SURF_16_BANK));
1826                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1827                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1828                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1829                                  NUM_BANKS(ADDR_SURF_16_BANK));
1830                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1831                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1832                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1833                                  NUM_BANKS(ADDR_SURF_8_BANK));
1834
1835                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1836                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1837                             reg_offset != 23)
1838                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1839
1840                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1841                         if (reg_offset != 7)
1842                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1843
1844                 break;
1845         case CHIP_FIJI:
1846                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1847                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1848                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1849                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1850                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1851                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1852                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1853                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1854                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1855                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1856                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1857                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1858                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1860                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1861                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1862                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1863                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1864                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1865                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1866                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1867                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1868                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1869                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1870                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1871                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1872                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1873                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1874                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1875                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1876                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1877                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1878                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1879                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1880                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1881                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1883                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1884                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1885                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1886                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1887                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1888                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1889                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1890                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1892                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1893                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1894                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1895                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1896                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1897                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1898                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1899                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1900                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1901                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1902                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1903                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1904                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1905                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1906                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1907                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1908                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1909                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1910                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1911                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1912                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1913                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1914                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1915                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1916                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1917                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1918                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1919                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1920                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1921                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1922                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1923                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1924                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1925                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1926                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1927                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1928                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1929                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1930                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1931                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1932                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1933                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1934                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1935                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1936                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1937                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1938                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1939                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1940                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1941                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1942                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1943                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1944                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1945                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1946                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1947                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1948                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1949                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1950                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1951                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1952                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1953                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1954                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1955                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1956                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1957                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1958                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1959                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1960                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1961                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1962                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1963                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1964                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1965                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1966                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1967                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1968
1969                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1970                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1971                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1972                                 NUM_BANKS(ADDR_SURF_8_BANK));
1973                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1974                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1975                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1976                                 NUM_BANKS(ADDR_SURF_8_BANK));
1977                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1978                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1979                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1980                                 NUM_BANKS(ADDR_SURF_8_BANK));
1981                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1982                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1983                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1984                                 NUM_BANKS(ADDR_SURF_8_BANK));
1985                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1986                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1987                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1988                                 NUM_BANKS(ADDR_SURF_8_BANK));
1989                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1990                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1991                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1992                                 NUM_BANKS(ADDR_SURF_8_BANK));
1993                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1994                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1995                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1996                                 NUM_BANKS(ADDR_SURF_8_BANK));
1997                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1998                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1999                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2000                                 NUM_BANKS(ADDR_SURF_8_BANK));
2001                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2002                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2003                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2004                                 NUM_BANKS(ADDR_SURF_8_BANK));
2005                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2006                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2007                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2008                                  NUM_BANKS(ADDR_SURF_8_BANK));
2009                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2010                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2011                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2012                                  NUM_BANKS(ADDR_SURF_8_BANK));
2013                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2014                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2015                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2016                                  NUM_BANKS(ADDR_SURF_8_BANK));
2017                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2018                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2019                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2020                                  NUM_BANKS(ADDR_SURF_8_BANK));
2021                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2022                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2023                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2024                                  NUM_BANKS(ADDR_SURF_4_BANK));
2025
2026                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2027                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2028
2029                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2030                         if (reg_offset != 7)
2031                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2032
2033                 break;
2034         case CHIP_TONGA:
2035                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2036                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2037                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2038                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2039                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2040                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2041                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2042                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2043                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2044                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2045                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2046                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2047                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2049                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2050                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2051                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2052                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2053                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2054                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2055                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2056                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2057                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2058                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2059                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2060                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2061                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2062                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2063                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2065                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2066                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2067                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2068                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2069                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2070                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2071                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2072                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2073                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2074                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2075                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2076                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2078                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2079                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2080                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2081                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2082                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2083                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2084                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2085                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2086                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2087                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2088                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2089                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2090                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2091                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2092                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2094                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2095                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2096                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2097                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2098                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2099                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2100                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2101                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2102                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2103                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2104                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2105                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2106                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2107                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2108                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2109                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2110                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2111                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2112                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2113                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2114                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2115                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2116                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2117                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2118                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2119                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2120                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2121                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2122                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2123                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2124                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2125                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2126                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2127                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2128                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2129                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2130                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2131                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2132                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2133                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2134                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2135                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2136                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2137                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2138                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2139                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2140                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2141                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2142                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2143                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2144                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2145                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2146                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2147                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2148                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2150                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2151                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2152                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2153                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2154                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2155                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2156                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2157
2158                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2159                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2160                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2161                                 NUM_BANKS(ADDR_SURF_16_BANK));
2162                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2163                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2164                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2165                                 NUM_BANKS(ADDR_SURF_16_BANK));
2166                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2168                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2169                                 NUM_BANKS(ADDR_SURF_16_BANK));
2170                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2172                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2173                                 NUM_BANKS(ADDR_SURF_16_BANK));
2174                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2176                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2177                                 NUM_BANKS(ADDR_SURF_16_BANK));
2178                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2179                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2180                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2181                                 NUM_BANKS(ADDR_SURF_16_BANK));
2182                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2184                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2185                                 NUM_BANKS(ADDR_SURF_16_BANK));
2186                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2188                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2189                                 NUM_BANKS(ADDR_SURF_16_BANK));
2190                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2191                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2192                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2193                                 NUM_BANKS(ADDR_SURF_16_BANK));
2194                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2195                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2196                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2197                                  NUM_BANKS(ADDR_SURF_16_BANK));
2198                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2199                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2200                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2201                                  NUM_BANKS(ADDR_SURF_16_BANK));
2202                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2203                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2204                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2205                                  NUM_BANKS(ADDR_SURF_8_BANK));
2206                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2208                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2209                                  NUM_BANKS(ADDR_SURF_4_BANK));
2210                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2211                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2212                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2213                                  NUM_BANKS(ADDR_SURF_4_BANK));
2214
2215                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2216                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2217
2218                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2219                         if (reg_offset != 7)
2220                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2221
2222                 break;
2223         case CHIP_STONEY:
2224                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225                                 PIPE_CONFIG(ADDR_SURF_P2) |
2226                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2227                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2228                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229                                 PIPE_CONFIG(ADDR_SURF_P2) |
2230                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2231                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233                                 PIPE_CONFIG(ADDR_SURF_P2) |
2234                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2235                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                 PIPE_CONFIG(ADDR_SURF_P2) |
2238                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2239                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241                                 PIPE_CONFIG(ADDR_SURF_P2) |
2242                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2243                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2245                                 PIPE_CONFIG(ADDR_SURF_P2) |
2246                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2247                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2249                                 PIPE_CONFIG(ADDR_SURF_P2) |
2250                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2251                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2253                                 PIPE_CONFIG(ADDR_SURF_P2));
2254                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2255                                 PIPE_CONFIG(ADDR_SURF_P2) |
2256                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2257                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2258                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259                                  PIPE_CONFIG(ADDR_SURF_P2) |
2260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2263                                  PIPE_CONFIG(ADDR_SURF_P2) |
2264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2266                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2267                                  PIPE_CONFIG(ADDR_SURF_P2) |
2268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2270                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2271                                  PIPE_CONFIG(ADDR_SURF_P2) |
2272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2275                                  PIPE_CONFIG(ADDR_SURF_P2) |
2276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279                                  PIPE_CONFIG(ADDR_SURF_P2) |
2280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2282                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2283                                  PIPE_CONFIG(ADDR_SURF_P2) |
2284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2286                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2287                                  PIPE_CONFIG(ADDR_SURF_P2) |
2288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2291                                  PIPE_CONFIG(ADDR_SURF_P2) |
2292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2295                                  PIPE_CONFIG(ADDR_SURF_P2) |
2296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2299                                  PIPE_CONFIG(ADDR_SURF_P2) |
2300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2303                                  PIPE_CONFIG(ADDR_SURF_P2) |
2304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2307                                  PIPE_CONFIG(ADDR_SURF_P2) |
2308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2311                                  PIPE_CONFIG(ADDR_SURF_P2) |
2312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2315                                  PIPE_CONFIG(ADDR_SURF_P2) |
2316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319                                  PIPE_CONFIG(ADDR_SURF_P2) |
2320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323                                  PIPE_CONFIG(ADDR_SURF_P2) |
2324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2326
2327                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2329                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2330                                 NUM_BANKS(ADDR_SURF_8_BANK));
2331                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2333                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2334                                 NUM_BANKS(ADDR_SURF_8_BANK));
2335                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2337                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338                                 NUM_BANKS(ADDR_SURF_8_BANK));
2339                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2341                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342                                 NUM_BANKS(ADDR_SURF_8_BANK));
2343                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346                                 NUM_BANKS(ADDR_SURF_8_BANK));
2347                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350                                 NUM_BANKS(ADDR_SURF_8_BANK));
2351                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354                                 NUM_BANKS(ADDR_SURF_8_BANK));
2355                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2356                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2357                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2358                                 NUM_BANKS(ADDR_SURF_16_BANK));
2359                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2362                                 NUM_BANKS(ADDR_SURF_16_BANK));
2363                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2364                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2366                                  NUM_BANKS(ADDR_SURF_16_BANK));
2367                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2368                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2369                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370                                  NUM_BANKS(ADDR_SURF_16_BANK));
2371                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374                                  NUM_BANKS(ADDR_SURF_16_BANK));
2375                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378                                  NUM_BANKS(ADDR_SURF_16_BANK));
2379                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2382                                  NUM_BANKS(ADDR_SURF_8_BANK));
2383
2384                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2385                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2386                             reg_offset != 23)
2387                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2388
2389                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2390                         if (reg_offset != 7)
2391                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2392
2393                 break;
2394         default:
2395                 dev_warn(adev->dev,
2396                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2397                          adev->asic_type);
2398
2399         case CHIP_CARRIZO:
2400                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401                                 PIPE_CONFIG(ADDR_SURF_P2) |
2402                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2403                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2404                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405                                 PIPE_CONFIG(ADDR_SURF_P2) |
2406                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2407                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409                                 PIPE_CONFIG(ADDR_SURF_P2) |
2410                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2411                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413                                 PIPE_CONFIG(ADDR_SURF_P2) |
2414                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2415                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                 PIPE_CONFIG(ADDR_SURF_P2) |
2418                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2419                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421                                 PIPE_CONFIG(ADDR_SURF_P2) |
2422                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425                                 PIPE_CONFIG(ADDR_SURF_P2) |
2426                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2429                                 PIPE_CONFIG(ADDR_SURF_P2));
2430                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2431                                 PIPE_CONFIG(ADDR_SURF_P2) |
2432                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2433                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435                                  PIPE_CONFIG(ADDR_SURF_P2) |
2436                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439                                  PIPE_CONFIG(ADDR_SURF_P2) |
2440                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2442                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2443                                  PIPE_CONFIG(ADDR_SURF_P2) |
2444                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2445                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447                                  PIPE_CONFIG(ADDR_SURF_P2) |
2448                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2449                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2450                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2451                                  PIPE_CONFIG(ADDR_SURF_P2) |
2452                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2455                                  PIPE_CONFIG(ADDR_SURF_P2) |
2456                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2458                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2459                                  PIPE_CONFIG(ADDR_SURF_P2) |
2460                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2462                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2463                                  PIPE_CONFIG(ADDR_SURF_P2) |
2464                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2465                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2466                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2467                                  PIPE_CONFIG(ADDR_SURF_P2) |
2468                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2469                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2470                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2471                                  PIPE_CONFIG(ADDR_SURF_P2) |
2472                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2473                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2475                                  PIPE_CONFIG(ADDR_SURF_P2) |
2476                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2479                                  PIPE_CONFIG(ADDR_SURF_P2) |
2480                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2481                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2483                                  PIPE_CONFIG(ADDR_SURF_P2) |
2484                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2485                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2487                                  PIPE_CONFIG(ADDR_SURF_P2) |
2488                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2491                                  PIPE_CONFIG(ADDR_SURF_P2) |
2492                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2493                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2494                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495                                  PIPE_CONFIG(ADDR_SURF_P2) |
2496                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2497                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2498                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499                                  PIPE_CONFIG(ADDR_SURF_P2) |
2500                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2501                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2502
2503                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2506                                 NUM_BANKS(ADDR_SURF_8_BANK));
2507                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2509                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510                                 NUM_BANKS(ADDR_SURF_8_BANK));
2511                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2513                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2514                                 NUM_BANKS(ADDR_SURF_8_BANK));
2515                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518                                 NUM_BANKS(ADDR_SURF_8_BANK));
2519                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522                                 NUM_BANKS(ADDR_SURF_8_BANK));
2523                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2526                                 NUM_BANKS(ADDR_SURF_8_BANK));
2527                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2530                                 NUM_BANKS(ADDR_SURF_8_BANK));
2531                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2532                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2533                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2534                                 NUM_BANKS(ADDR_SURF_16_BANK));
2535                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2536                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2537                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538                                 NUM_BANKS(ADDR_SURF_16_BANK));
2539                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2540                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542                                  NUM_BANKS(ADDR_SURF_16_BANK));
2543                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2544                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2545                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2546                                  NUM_BANKS(ADDR_SURF_16_BANK));
2547                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2550                                  NUM_BANKS(ADDR_SURF_16_BANK));
2551                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2553                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554                                  NUM_BANKS(ADDR_SURF_16_BANK));
2555                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2558                                  NUM_BANKS(ADDR_SURF_8_BANK));
2559
2560                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2561                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2562                             reg_offset != 23)
2563                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2564
2565                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2566                         if (reg_offset != 7)
2567                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2568
2569                 break;
2570         }
2571 }
2572
2573 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2574 {
2575         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2576
2577         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2578                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2579                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2580         } else if (se_num == 0xffffffff) {
2581                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2582                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2583         } else if (sh_num == 0xffffffff) {
2584                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2585                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2586         } else {
2587                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2588                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2589         }
2590         WREG32(mmGRBM_GFX_INDEX, data);
2591 }
2592
2593 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2594 {
2595         return (u32)((1ULL << bit_width) - 1);
2596 }
2597
2598 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2599 {
2600         u32 data, mask;
2601
2602         data = RREG32(mmCC_RB_BACKEND_DISABLE);
2603         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2604
2605         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2606         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2607
2608         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
2609                                        adev->gfx.config.max_sh_per_se);
2610
2611         return (~data) & mask;
2612 }
2613
2614 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
2615 {
2616         int i, j;
2617         u32 data;
2618         u32 active_rbs = 0;
2619         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2620                                         adev->gfx.config.max_sh_per_se;
2621
2622         mutex_lock(&adev->grbm_idx_mutex);
2623         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2624                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2625                         gfx_v8_0_select_se_sh(adev, i, j);
2626                         data = gfx_v8_0_get_rb_active_bitmap(adev);
2627                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2628                                                rb_bitmap_width_per_sh);
2629                 }
2630         }
2631         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2632         mutex_unlock(&adev->grbm_idx_mutex);
2633
2634         adev->gfx.config.backend_enable_mask = active_rbs;
2635         adev->gfx.config.num_rbs = hweight32(active_rbs);
2636 }
2637
2638 /**
2639  * gfx_v8_0_init_compute_vmid - gart enable
2640  *
2641  * @rdev: amdgpu_device pointer
2642  *
2643  * Initialize compute vmid sh_mem registers
2644  *
2645  */
2646 #define DEFAULT_SH_MEM_BASES    (0x6000)
2647 #define FIRST_COMPUTE_VMID      (8)
2648 #define LAST_COMPUTE_VMID       (16)
2649 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2650 {
2651         int i;
2652         uint32_t sh_mem_config;
2653         uint32_t sh_mem_bases;
2654
2655         /*
2656          * Configure apertures:
2657          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2658          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2659          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2660          */
2661         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2662
2663         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2664                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2665                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2666                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2667                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2668                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2669
2670         mutex_lock(&adev->srbm_mutex);
2671         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2672                 vi_srbm_select(adev, 0, 0, 0, i);
2673                 /* CP and shaders */
2674                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2675                 WREG32(mmSH_MEM_APE1_BASE, 1);
2676                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2677                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
2678         }
2679         vi_srbm_select(adev, 0, 0, 0, 0);
2680         mutex_unlock(&adev->srbm_mutex);
2681 }
2682
2683 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2684 {
2685         u32 tmp;
2686         int i;
2687
2688         tmp = RREG32(mmGRBM_CNTL);
2689         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2690         WREG32(mmGRBM_CNTL, tmp);
2691
2692         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2693         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2694         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2695
2696         gfx_v8_0_tiling_mode_table_init(adev);
2697
2698         gfx_v8_0_setup_rb(adev);
2699
2700         /* XXX SH_MEM regs */
2701         /* where to put LDS, scratch, GPUVM in FSA64 space */
2702         mutex_lock(&adev->srbm_mutex);
2703         for (i = 0; i < 16; i++) {
2704                 vi_srbm_select(adev, 0, 0, 0, i);
2705                 /* CP and shaders */
2706                 if (i == 0) {
2707                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2708                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2709                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2710                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2711                         WREG32(mmSH_MEM_CONFIG, tmp);
2712                 } else {
2713                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2714                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2715                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2716                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2717                         WREG32(mmSH_MEM_CONFIG, tmp);
2718                 }
2719
2720                 WREG32(mmSH_MEM_APE1_BASE, 1);
2721                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2722                 WREG32(mmSH_MEM_BASES, 0);
2723         }
2724         vi_srbm_select(adev, 0, 0, 0, 0);
2725         mutex_unlock(&adev->srbm_mutex);
2726
2727         gfx_v8_0_init_compute_vmid(adev);
2728
2729         mutex_lock(&adev->grbm_idx_mutex);
2730         /*
2731          * making sure that the following register writes will be broadcasted
2732          * to all the shaders
2733          */
2734         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2735
2736         WREG32(mmPA_SC_FIFO_SIZE,
2737                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
2738                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2739                    (adev->gfx.config.sc_prim_fifo_size_backend <<
2740                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2741                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
2742                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2743                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2744                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2745         mutex_unlock(&adev->grbm_idx_mutex);
2746
2747 }
2748
2749 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2750 {
2751         u32 i, j, k;
2752         u32 mask;
2753
2754         mutex_lock(&adev->grbm_idx_mutex);
2755         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2756                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2757                         gfx_v8_0_select_se_sh(adev, i, j);
2758                         for (k = 0; k < adev->usec_timeout; k++) {
2759                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2760                                         break;
2761                                 udelay(1);
2762                         }
2763                 }
2764         }
2765         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2766         mutex_unlock(&adev->grbm_idx_mutex);
2767
2768         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2769                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2770                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2771                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2772         for (k = 0; k < adev->usec_timeout; k++) {
2773                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2774                         break;
2775                 udelay(1);
2776         }
2777 }
2778
2779 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2780                                                bool enable)
2781 {
2782         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2783
2784         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2785         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2786         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2787         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2788
2789         WREG32(mmCP_INT_CNTL_RING0, tmp);
2790 }
2791
2792 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2793 {
2794         u32 tmp = RREG32(mmRLC_CNTL);
2795
2796         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2797         WREG32(mmRLC_CNTL, tmp);
2798
2799         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2800
2801         gfx_v8_0_wait_for_rlc_serdes(adev);
2802 }
2803
2804 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2805 {
2806         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2807
2808         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2809         WREG32(mmGRBM_SOFT_RESET, tmp);
2810         udelay(50);
2811         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2812         WREG32(mmGRBM_SOFT_RESET, tmp);
2813         udelay(50);
2814 }
2815
2816 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2817 {
2818         u32 tmp = RREG32(mmRLC_CNTL);
2819
2820         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2821         WREG32(mmRLC_CNTL, tmp);
2822
2823         /* carrizo do enable cp interrupt after cp inited */
2824         if (!(adev->flags & AMD_IS_APU))
2825                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2826
2827         udelay(50);
2828 }
2829
2830 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2831 {
2832         const struct rlc_firmware_header_v2_0 *hdr;
2833         const __le32 *fw_data;
2834         unsigned i, fw_size;
2835
2836         if (!adev->gfx.rlc_fw)
2837                 return -EINVAL;
2838
2839         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2840         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2841
2842         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2843                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2844         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2845
2846         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2847         for (i = 0; i < fw_size; i++)
2848                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2849         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2850
2851         return 0;
2852 }
2853
2854 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2855 {
2856         int r;
2857
2858         gfx_v8_0_rlc_stop(adev);
2859
2860         /* disable CG */
2861         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2862
2863         /* disable PG */
2864         WREG32(mmRLC_PG_CNTL, 0);
2865
2866         gfx_v8_0_rlc_reset(adev);
2867
2868         if (!adev->pp_enabled) {
2869                 if (!adev->firmware.smu_load) {
2870                         /* legacy rlc firmware loading */
2871                         r = gfx_v8_0_rlc_load_microcode(adev);
2872                         if (r)
2873                                 return r;
2874                 } else {
2875                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2876                                                         AMDGPU_UCODE_ID_RLC_G);
2877                         if (r)
2878                                 return -EINVAL;
2879                 }
2880         }
2881
2882         gfx_v8_0_rlc_start(adev);
2883
2884         return 0;
2885 }
2886
2887 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2888 {
2889         int i;
2890         u32 tmp = RREG32(mmCP_ME_CNTL);
2891
2892         if (enable) {
2893                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2894                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2895                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2896         } else {
2897                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2898                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2899                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2900                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2901                         adev->gfx.gfx_ring[i].ready = false;
2902         }
2903         WREG32(mmCP_ME_CNTL, tmp);
2904         udelay(50);
2905 }
2906
2907 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2908 {
2909         const struct gfx_firmware_header_v1_0 *pfp_hdr;
2910         const struct gfx_firmware_header_v1_0 *ce_hdr;
2911         const struct gfx_firmware_header_v1_0 *me_hdr;
2912         const __le32 *fw_data;
2913         unsigned i, fw_size;
2914
2915         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2916                 return -EINVAL;
2917
2918         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2919                 adev->gfx.pfp_fw->data;
2920         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2921                 adev->gfx.ce_fw->data;
2922         me_hdr = (const struct gfx_firmware_header_v1_0 *)
2923                 adev->gfx.me_fw->data;
2924
2925         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2926         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2927         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2928
2929         gfx_v8_0_cp_gfx_enable(adev, false);
2930
2931         /* PFP */
2932         fw_data = (const __le32 *)
2933                 (adev->gfx.pfp_fw->data +
2934                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2935         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2936         WREG32(mmCP_PFP_UCODE_ADDR, 0);
2937         for (i = 0; i < fw_size; i++)
2938                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2939         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2940
2941         /* CE */
2942         fw_data = (const __le32 *)
2943                 (adev->gfx.ce_fw->data +
2944                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2945         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2946         WREG32(mmCP_CE_UCODE_ADDR, 0);
2947         for (i = 0; i < fw_size; i++)
2948                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2949         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2950
2951         /* ME */
2952         fw_data = (const __le32 *)
2953                 (adev->gfx.me_fw->data +
2954                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2955         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2956         WREG32(mmCP_ME_RAM_WADDR, 0);
2957         for (i = 0; i < fw_size; i++)
2958                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2959         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2960
2961         return 0;
2962 }
2963
2964 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
2965 {
2966         u32 count = 0;
2967         const struct cs_section_def *sect = NULL;
2968         const struct cs_extent_def *ext = NULL;
2969
2970         /* begin clear state */
2971         count += 2;
2972         /* context control state */
2973         count += 3;
2974
2975         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2976                 for (ext = sect->section; ext->extent != NULL; ++ext) {
2977                         if (sect->id == SECT_CONTEXT)
2978                                 count += 2 + ext->reg_count;
2979                         else
2980                                 return 0;
2981                 }
2982         }
2983         /* pa_sc_raster_config/pa_sc_raster_config1 */
2984         count += 4;
2985         /* end clear state */
2986         count += 2;
2987         /* clear state */
2988         count += 2;
2989
2990         return count;
2991 }
2992
2993 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2994 {
2995         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2996         const struct cs_section_def *sect = NULL;
2997         const struct cs_extent_def *ext = NULL;
2998         int r, i;
2999
3000         /* init the CP */
3001         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3002         WREG32(mmCP_ENDIAN_SWAP, 0);
3003         WREG32(mmCP_DEVICE_ID, 1);
3004
3005         gfx_v8_0_cp_gfx_enable(adev, true);
3006
3007         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3008         if (r) {
3009                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3010                 return r;
3011         }
3012
3013         /* clear state buffer */
3014         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3015         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3016
3017         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3018         amdgpu_ring_write(ring, 0x80000000);
3019         amdgpu_ring_write(ring, 0x80000000);
3020
3021         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3022                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3023                         if (sect->id == SECT_CONTEXT) {
3024                                 amdgpu_ring_write(ring,
3025                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3026                                                ext->reg_count));
3027                                 amdgpu_ring_write(ring,
3028                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3029                                 for (i = 0; i < ext->reg_count; i++)
3030                                         amdgpu_ring_write(ring, ext->extent[i]);
3031                         }
3032                 }
3033         }
3034
3035         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3036         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3037         switch (adev->asic_type) {
3038         case CHIP_TONGA:
3039                 amdgpu_ring_write(ring, 0x16000012);
3040                 amdgpu_ring_write(ring, 0x0000002A);
3041                 break;
3042         case CHIP_FIJI:
3043                 amdgpu_ring_write(ring, 0x3a00161a);
3044                 amdgpu_ring_write(ring, 0x0000002e);
3045                 break;
3046         case CHIP_TOPAZ:
3047         case CHIP_CARRIZO:
3048                 amdgpu_ring_write(ring, 0x00000002);
3049                 amdgpu_ring_write(ring, 0x00000000);
3050                 break;
3051         case CHIP_STONEY:
3052                 amdgpu_ring_write(ring, 0x00000000);
3053                 amdgpu_ring_write(ring, 0x00000000);
3054                 break;
3055         default:
3056                 BUG();
3057         }
3058
3059         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3060         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3061
3062         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3063         amdgpu_ring_write(ring, 0);
3064
3065         /* init the CE partitions */
3066         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3067         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3068         amdgpu_ring_write(ring, 0x8000);
3069         amdgpu_ring_write(ring, 0x8000);
3070
3071         amdgpu_ring_commit(ring);
3072
3073         return 0;
3074 }
3075
3076 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3077 {
3078         struct amdgpu_ring *ring;
3079         u32 tmp;
3080         u32 rb_bufsz;
3081         u64 rb_addr, rptr_addr;
3082         int r;
3083
3084         /* Set the write pointer delay */
3085         WREG32(mmCP_RB_WPTR_DELAY, 0);
3086
3087         /* set the RB to use vmid 0 */
3088         WREG32(mmCP_RB_VMID, 0);
3089
3090         /* Set ring buffer size */
3091         ring = &adev->gfx.gfx_ring[0];
3092         rb_bufsz = order_base_2(ring->ring_size / 8);
3093         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3094         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3095         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3096         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3097 #ifdef __BIG_ENDIAN
3098         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3099 #endif
3100         WREG32(mmCP_RB0_CNTL, tmp);
3101
3102         /* Initialize the ring buffer's read and write pointers */
3103         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3104         ring->wptr = 0;
3105         WREG32(mmCP_RB0_WPTR, ring->wptr);
3106
3107         /* set the wb address wether it's enabled or not */
3108         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3109         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3110         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3111
3112         mdelay(1);
3113         WREG32(mmCP_RB0_CNTL, tmp);
3114
3115         rb_addr = ring->gpu_addr >> 8;
3116         WREG32(mmCP_RB0_BASE, rb_addr);
3117         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3118
3119         /* no gfx doorbells on iceland */
3120         if (adev->asic_type != CHIP_TOPAZ) {
3121                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3122                 if (ring->use_doorbell) {
3123                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3124                                             DOORBELL_OFFSET, ring->doorbell_index);
3125                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3126                                             DOORBELL_EN, 1);
3127                 } else {
3128                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3129                                             DOORBELL_EN, 0);
3130                 }
3131                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3132
3133                 if (adev->asic_type == CHIP_TONGA) {
3134                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3135                                             DOORBELL_RANGE_LOWER,
3136                                             AMDGPU_DOORBELL_GFX_RING0);
3137                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3138
3139                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3140                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3141                 }
3142
3143         }
3144
3145         /* start the ring */
3146         gfx_v8_0_cp_gfx_start(adev);
3147         ring->ready = true;
3148         r = amdgpu_ring_test_ring(ring);
3149         if (r) {
3150                 ring->ready = false;
3151                 return r;
3152         }
3153
3154         return 0;
3155 }
3156
3157 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3158 {
3159         int i;
3160
3161         if (enable) {
3162                 WREG32(mmCP_MEC_CNTL, 0);
3163         } else {
3164                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3165                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3166                         adev->gfx.compute_ring[i].ready = false;
3167         }
3168         udelay(50);
3169 }
3170
3171 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3172 {
3173         const struct gfx_firmware_header_v1_0 *mec_hdr;
3174         const __le32 *fw_data;
3175         unsigned i, fw_size;
3176
3177         if (!adev->gfx.mec_fw)
3178                 return -EINVAL;
3179
3180         gfx_v8_0_cp_compute_enable(adev, false);
3181
3182         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3183         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3184
3185         fw_data = (const __le32 *)
3186                 (adev->gfx.mec_fw->data +
3187                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3188         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3189
3190         /* MEC1 */
3191         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3192         for (i = 0; i < fw_size; i++)
3193                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3194         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3195
3196         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3197         if (adev->gfx.mec2_fw) {
3198                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3199
3200                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3201                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3202
3203                 fw_data = (const __le32 *)
3204                         (adev->gfx.mec2_fw->data +
3205                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3206                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3207
3208                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3209                 for (i = 0; i < fw_size; i++)
3210                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3211                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3212         }
3213
3214         return 0;
3215 }
3216
3217 struct vi_mqd {
3218         uint32_t header;  /* ordinal0 */
3219         uint32_t compute_dispatch_initiator;  /* ordinal1 */
3220         uint32_t compute_dim_x;  /* ordinal2 */
3221         uint32_t compute_dim_y;  /* ordinal3 */
3222         uint32_t compute_dim_z;  /* ordinal4 */
3223         uint32_t compute_start_x;  /* ordinal5 */
3224         uint32_t compute_start_y;  /* ordinal6 */
3225         uint32_t compute_start_z;  /* ordinal7 */
3226         uint32_t compute_num_thread_x;  /* ordinal8 */
3227         uint32_t compute_num_thread_y;  /* ordinal9 */
3228         uint32_t compute_num_thread_z;  /* ordinal10 */
3229         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
3230         uint32_t compute_perfcount_enable;  /* ordinal12 */
3231         uint32_t compute_pgm_lo;  /* ordinal13 */
3232         uint32_t compute_pgm_hi;  /* ordinal14 */
3233         uint32_t compute_tba_lo;  /* ordinal15 */
3234         uint32_t compute_tba_hi;  /* ordinal16 */
3235         uint32_t compute_tma_lo;  /* ordinal17 */
3236         uint32_t compute_tma_hi;  /* ordinal18 */
3237         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
3238         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
3239         uint32_t compute_vmid;  /* ordinal21 */
3240         uint32_t compute_resource_limits;  /* ordinal22 */
3241         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
3242         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
3243         uint32_t compute_tmpring_size;  /* ordinal25 */
3244         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
3245         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
3246         uint32_t compute_restart_x;  /* ordinal28 */
3247         uint32_t compute_restart_y;  /* ordinal29 */
3248         uint32_t compute_restart_z;  /* ordinal30 */
3249         uint32_t compute_thread_trace_enable;  /* ordinal31 */
3250         uint32_t compute_misc_reserved;  /* ordinal32 */
3251         uint32_t compute_dispatch_id;  /* ordinal33 */
3252         uint32_t compute_threadgroup_id;  /* ordinal34 */
3253         uint32_t compute_relaunch;  /* ordinal35 */
3254         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
3255         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
3256         uint32_t compute_wave_restore_control;  /* ordinal38 */
3257         uint32_t reserved9;  /* ordinal39 */
3258         uint32_t reserved10;  /* ordinal40 */
3259         uint32_t reserved11;  /* ordinal41 */
3260         uint32_t reserved12;  /* ordinal42 */
3261         uint32_t reserved13;  /* ordinal43 */
3262         uint32_t reserved14;  /* ordinal44 */
3263         uint32_t reserved15;  /* ordinal45 */
3264         uint32_t reserved16;  /* ordinal46 */
3265         uint32_t reserved17;  /* ordinal47 */
3266         uint32_t reserved18;  /* ordinal48 */
3267         uint32_t reserved19;  /* ordinal49 */
3268         uint32_t reserved20;  /* ordinal50 */
3269         uint32_t reserved21;  /* ordinal51 */
3270         uint32_t reserved22;  /* ordinal52 */
3271         uint32_t reserved23;  /* ordinal53 */
3272         uint32_t reserved24;  /* ordinal54 */
3273         uint32_t reserved25;  /* ordinal55 */
3274         uint32_t reserved26;  /* ordinal56 */
3275         uint32_t reserved27;  /* ordinal57 */
3276         uint32_t reserved28;  /* ordinal58 */
3277         uint32_t reserved29;  /* ordinal59 */
3278         uint32_t reserved30;  /* ordinal60 */
3279         uint32_t reserved31;  /* ordinal61 */
3280         uint32_t reserved32;  /* ordinal62 */
3281         uint32_t reserved33;  /* ordinal63 */
3282         uint32_t reserved34;  /* ordinal64 */
3283         uint32_t compute_user_data_0;  /* ordinal65 */
3284         uint32_t compute_user_data_1;  /* ordinal66 */
3285         uint32_t compute_user_data_2;  /* ordinal67 */
3286         uint32_t compute_user_data_3;  /* ordinal68 */
3287         uint32_t compute_user_data_4;  /* ordinal69 */
3288         uint32_t compute_user_data_5;  /* ordinal70 */
3289         uint32_t compute_user_data_6;  /* ordinal71 */
3290         uint32_t compute_user_data_7;  /* ordinal72 */
3291         uint32_t compute_user_data_8;  /* ordinal73 */
3292         uint32_t compute_user_data_9;  /* ordinal74 */
3293         uint32_t compute_user_data_10;  /* ordinal75 */
3294         uint32_t compute_user_data_11;  /* ordinal76 */
3295         uint32_t compute_user_data_12;  /* ordinal77 */
3296         uint32_t compute_user_data_13;  /* ordinal78 */
3297         uint32_t compute_user_data_14;  /* ordinal79 */
3298         uint32_t compute_user_data_15;  /* ordinal80 */
3299         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
3300         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
3301         uint32_t reserved35;  /* ordinal83 */
3302         uint32_t reserved36;  /* ordinal84 */
3303         uint32_t reserved37;  /* ordinal85 */
3304         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
3305         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
3306         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
3307         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
3308         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
3309         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
3310         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
3311         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
3312         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
3313         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
3314         uint32_t reserved38;  /* ordinal96 */
3315         uint32_t reserved39;  /* ordinal97 */
3316         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
3317         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
3318         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
3319         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
3320         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
3321         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
3322         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
3323         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
3324         uint32_t reserved40;  /* ordinal106 */
3325         uint32_t reserved41;  /* ordinal107 */
3326         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
3327         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
3328         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
3329         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
3330         uint32_t reserved42;  /* ordinal112 */
3331         uint32_t reserved43;  /* ordinal113 */
3332         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
3333         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
3334         uint32_t cp_packet_id_lo;  /* ordinal116 */
3335         uint32_t cp_packet_id_hi;  /* ordinal117 */
3336         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
3337         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
3338         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
3339         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
3340         uint32_t gds_save_mask_lo;  /* ordinal122 */
3341         uint32_t gds_save_mask_hi;  /* ordinal123 */
3342         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
3343         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
3344         uint32_t reserved44;  /* ordinal126 */
3345         uint32_t reserved45;  /* ordinal127 */
3346         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
3347         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
3348         uint32_t cp_hqd_active;  /* ordinal130 */
3349         uint32_t cp_hqd_vmid;  /* ordinal131 */
3350         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
3351         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
3352         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
3353         uint32_t cp_hqd_quantum;  /* ordinal135 */
3354         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
3355         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
3356         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
3357         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
3358         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
3359         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
3360         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
3361         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
3362         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
3363         uint32_t cp_hqd_pq_control;  /* ordinal145 */
3364         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
3365         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
3366         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
3367         uint32_t cp_hqd_ib_control;  /* ordinal149 */
3368         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
3369         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
3370         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
3371         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
3372         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
3373         uint32_t cp_hqd_msg_type;  /* ordinal155 */
3374         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
3375         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
3376         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
3377         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
3378         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
3379         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
3380         uint32_t cp_mqd_control;  /* ordinal162 */
3381         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
3382         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
3383         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
3384         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
3385         uint32_t cp_hqd_eop_control;  /* ordinal167 */
3386         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
3387         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
3388         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
3389         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
3390         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
3391         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
3392         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
3393         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
3394         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
3395         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
3396         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
3397         uint32_t cp_hqd_error;  /* ordinal179 */
3398         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
3399         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
3400         uint32_t reserved46;  /* ordinal182 */
3401         uint32_t reserved47;  /* ordinal183 */
3402         uint32_t reserved48;  /* ordinal184 */
3403         uint32_t reserved49;  /* ordinal185 */
3404         uint32_t reserved50;  /* ordinal186 */
3405         uint32_t reserved51;  /* ordinal187 */
3406         uint32_t reserved52;  /* ordinal188 */
3407         uint32_t reserved53;  /* ordinal189 */
3408         uint32_t reserved54;  /* ordinal190 */
3409         uint32_t reserved55;  /* ordinal191 */
3410         uint32_t iqtimer_pkt_header;  /* ordinal192 */
3411         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
3412         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
3413         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
3414         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
3415         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
3416         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
3417         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
3418         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
3419         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
3420         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
3421         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
3422         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
3423         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
3424         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
3425         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
3426         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
3427         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
3428         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
3429         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
3430         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
3431         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
3432         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
3433         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
3434         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
3435         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
3436         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
3437         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
3438         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
3439         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
3440         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
3441         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
3442         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
3443         uint32_t reserved56;  /* ordinal225 */
3444         uint32_t reserved57;  /* ordinal226 */
3445         uint32_t reserved58;  /* ordinal227 */
3446         uint32_t set_resources_header;  /* ordinal228 */
3447         uint32_t set_resources_dw1;  /* ordinal229 */
3448         uint32_t set_resources_dw2;  /* ordinal230 */
3449         uint32_t set_resources_dw3;  /* ordinal231 */
3450         uint32_t set_resources_dw4;  /* ordinal232 */
3451         uint32_t set_resources_dw5;  /* ordinal233 */
3452         uint32_t set_resources_dw6;  /* ordinal234 */
3453         uint32_t set_resources_dw7;  /* ordinal235 */
3454         uint32_t reserved59;  /* ordinal236 */
3455         uint32_t reserved60;  /* ordinal237 */
3456         uint32_t reserved61;  /* ordinal238 */
3457         uint32_t reserved62;  /* ordinal239 */
3458         uint32_t reserved63;  /* ordinal240 */
3459         uint32_t reserved64;  /* ordinal241 */
3460         uint32_t reserved65;  /* ordinal242 */
3461         uint32_t reserved66;  /* ordinal243 */
3462         uint32_t reserved67;  /* ordinal244 */
3463         uint32_t reserved68;  /* ordinal245 */
3464         uint32_t reserved69;  /* ordinal246 */
3465         uint32_t reserved70;  /* ordinal247 */
3466         uint32_t reserved71;  /* ordinal248 */
3467         uint32_t reserved72;  /* ordinal249 */
3468         uint32_t reserved73;  /* ordinal250 */
3469         uint32_t reserved74;  /* ordinal251 */
3470         uint32_t reserved75;  /* ordinal252 */
3471         uint32_t reserved76;  /* ordinal253 */
3472         uint32_t reserved77;  /* ordinal254 */
3473         uint32_t reserved78;  /* ordinal255 */
3474
3475         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3476 };
3477
3478 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3479 {
3480         int i, r;
3481
3482         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3483                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3484
3485                 if (ring->mqd_obj) {
3486                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3487                         if (unlikely(r != 0))
3488                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3489
3490                         amdgpu_bo_unpin(ring->mqd_obj);
3491                         amdgpu_bo_unreserve(ring->mqd_obj);
3492
3493                         amdgpu_bo_unref(&ring->mqd_obj);
3494                         ring->mqd_obj = NULL;
3495                 }
3496         }
3497 }
3498
3499 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3500 {
3501         int r, i, j;
3502         u32 tmp;
3503         bool use_doorbell = true;
3504         u64 hqd_gpu_addr;
3505         u64 mqd_gpu_addr;
3506         u64 eop_gpu_addr;
3507         u64 wb_gpu_addr;
3508         u32 *buf;
3509         struct vi_mqd *mqd;
3510
3511         /* init the pipes */
3512         mutex_lock(&adev->srbm_mutex);
3513         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3514                 int me = (i < 4) ? 1 : 2;
3515                 int pipe = (i < 4) ? i : (i - 4);
3516
3517                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3518                 eop_gpu_addr >>= 8;
3519
3520                 vi_srbm_select(adev, me, pipe, 0, 0);
3521
3522                 /* write the EOP addr */
3523                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3524                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3525
3526                 /* set the VMID assigned */
3527                 WREG32(mmCP_HQD_VMID, 0);
3528
3529                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3530                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3531                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3532                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
3533                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3534         }
3535         vi_srbm_select(adev, 0, 0, 0, 0);
3536         mutex_unlock(&adev->srbm_mutex);
3537
3538         /* init the queues.  Just two for now. */
3539         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3540                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3541
3542                 if (ring->mqd_obj == NULL) {
3543                         r = amdgpu_bo_create(adev,
3544                                              sizeof(struct vi_mqd),
3545                                              PAGE_SIZE, true,
3546                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3547                                              NULL, &ring->mqd_obj);
3548                         if (r) {
3549                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3550                                 return r;
3551                         }
3552                 }
3553
3554                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3555                 if (unlikely(r != 0)) {
3556                         gfx_v8_0_cp_compute_fini(adev);
3557                         return r;
3558                 }
3559                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3560                                   &mqd_gpu_addr);
3561                 if (r) {
3562                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3563                         gfx_v8_0_cp_compute_fini(adev);
3564                         return r;
3565                 }
3566                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3567                 if (r) {
3568                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3569                         gfx_v8_0_cp_compute_fini(adev);
3570                         return r;
3571                 }
3572
3573                 /* init the mqd struct */
3574                 memset(buf, 0, sizeof(struct vi_mqd));
3575
3576                 mqd = (struct vi_mqd *)buf;
3577                 mqd->header = 0xC0310800;
3578                 mqd->compute_pipelinestat_enable = 0x00000001;
3579                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3580                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3581                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3582                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3583                 mqd->compute_misc_reserved = 0x00000003;
3584
3585                 mutex_lock(&adev->srbm_mutex);
3586                 vi_srbm_select(adev, ring->me,
3587                                ring->pipe,
3588                                ring->queue, 0);
3589
3590                 /* disable wptr polling */
3591                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3592                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3593                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3594
3595                 mqd->cp_hqd_eop_base_addr_lo =
3596                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
3597                 mqd->cp_hqd_eop_base_addr_hi =
3598                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3599
3600                 /* enable doorbell? */
3601                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3602                 if (use_doorbell) {
3603                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3604                 } else {
3605                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3606                 }
3607                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3608                 mqd->cp_hqd_pq_doorbell_control = tmp;
3609
3610                 /* disable the queue if it's active */
3611                 mqd->cp_hqd_dequeue_request = 0;
3612                 mqd->cp_hqd_pq_rptr = 0;
3613                 mqd->cp_hqd_pq_wptr= 0;
3614                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3615                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3616                         for (j = 0; j < adev->usec_timeout; j++) {
3617                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3618                                         break;
3619                                 udelay(1);
3620                         }
3621                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3622                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3623                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3624                 }
3625
3626                 /* set the pointer to the MQD */
3627                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3628                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3629                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3630                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3631
3632                 /* set MQD vmid to 0 */
3633                 tmp = RREG32(mmCP_MQD_CONTROL);
3634                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3635                 WREG32(mmCP_MQD_CONTROL, tmp);
3636                 mqd->cp_mqd_control = tmp;
3637
3638                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3639                 hqd_gpu_addr = ring->gpu_addr >> 8;
3640                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3641                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3642                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3643                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3644
3645                 /* set up the HQD, this is similar to CP_RB0_CNTL */
3646                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3647                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3648                                     (order_base_2(ring->ring_size / 4) - 1));
3649                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3650                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3651 #ifdef __BIG_ENDIAN
3652                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3653 #endif
3654                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3655                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3656                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3657                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3658                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3659                 mqd->cp_hqd_pq_control = tmp;
3660
3661                 /* set the wb address wether it's enabled or not */
3662                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3663                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3664                 mqd->cp_hqd_pq_rptr_report_addr_hi =
3665                         upper_32_bits(wb_gpu_addr) & 0xffff;
3666                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3667                        mqd->cp_hqd_pq_rptr_report_addr_lo);
3668                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3669                        mqd->cp_hqd_pq_rptr_report_addr_hi);
3670
3671                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3672                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3673                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3674                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3675                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3676                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3677                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
3678
3679                 /* enable the doorbell if requested */
3680                 if (use_doorbell) {
3681                         if ((adev->asic_type == CHIP_CARRIZO) ||
3682                             (adev->asic_type == CHIP_FIJI) ||
3683                             (adev->asic_type == CHIP_STONEY)) {
3684                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3685                                        AMDGPU_DOORBELL_KIQ << 2);
3686                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3687                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
3688                         }
3689                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3690                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3691                                             DOORBELL_OFFSET, ring->doorbell_index);
3692                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3693                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3694                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3695                         mqd->cp_hqd_pq_doorbell_control = tmp;
3696
3697                 } else {
3698                         mqd->cp_hqd_pq_doorbell_control = 0;
3699                 }
3700                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3701                        mqd->cp_hqd_pq_doorbell_control);
3702
3703                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3704                 ring->wptr = 0;
3705                 mqd->cp_hqd_pq_wptr = ring->wptr;
3706                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3707                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3708
3709                 /* set the vmid for the queue */
3710                 mqd->cp_hqd_vmid = 0;
3711                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3712
3713                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3714                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3715                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3716                 mqd->cp_hqd_persistent_state = tmp;
3717                 if (adev->asic_type == CHIP_STONEY) {
3718                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3719                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3720                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3721                 }
3722
3723                 /* activate the queue */
3724                 mqd->cp_hqd_active = 1;
3725                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3726
3727                 vi_srbm_select(adev, 0, 0, 0, 0);
3728                 mutex_unlock(&adev->srbm_mutex);
3729
3730                 amdgpu_bo_kunmap(ring->mqd_obj);
3731                 amdgpu_bo_unreserve(ring->mqd_obj);
3732         }
3733
3734         if (use_doorbell) {
3735                 tmp = RREG32(mmCP_PQ_STATUS);
3736                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3737                 WREG32(mmCP_PQ_STATUS, tmp);
3738         }
3739
3740         gfx_v8_0_cp_compute_enable(adev, true);
3741
3742         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3743                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3744
3745                 ring->ready = true;
3746                 r = amdgpu_ring_test_ring(ring);
3747                 if (r)
3748                         ring->ready = false;
3749         }
3750
3751         return 0;
3752 }
3753
3754 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3755 {
3756         int r;
3757
3758         if (!(adev->flags & AMD_IS_APU))
3759                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3760
3761         if (!adev->pp_enabled) {
3762                 if (!adev->firmware.smu_load) {
3763                         /* legacy firmware loading */
3764                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
3765                         if (r)
3766                                 return r;
3767
3768                         r = gfx_v8_0_cp_compute_load_microcode(adev);
3769                         if (r)
3770                                 return r;
3771                 } else {
3772                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3773                                                         AMDGPU_UCODE_ID_CP_CE);
3774                         if (r)
3775                                 return -EINVAL;
3776
3777                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3778                                                         AMDGPU_UCODE_ID_CP_PFP);
3779                         if (r)
3780                                 return -EINVAL;
3781
3782                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3783                                                         AMDGPU_UCODE_ID_CP_ME);
3784                         if (r)
3785                                 return -EINVAL;
3786
3787                         if (adev->asic_type == CHIP_TOPAZ) {
3788                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
3789                                 if (r)
3790                                         return r;
3791                         } else {
3792                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3793                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
3794                                 if (r)
3795                                         return -EINVAL;
3796                         }
3797                 }
3798         }
3799
3800         r = gfx_v8_0_cp_gfx_resume(adev);
3801         if (r)
3802                 return r;
3803
3804         r = gfx_v8_0_cp_compute_resume(adev);
3805         if (r)
3806                 return r;
3807
3808         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3809
3810         return 0;
3811 }
3812
3813 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3814 {
3815         gfx_v8_0_cp_gfx_enable(adev, enable);
3816         gfx_v8_0_cp_compute_enable(adev, enable);
3817 }
3818
3819 static int gfx_v8_0_hw_init(void *handle)
3820 {
3821         int r;
3822         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3823
3824         gfx_v8_0_init_golden_registers(adev);
3825
3826         gfx_v8_0_gpu_init(adev);
3827
3828         r = gfx_v8_0_rlc_resume(adev);
3829         if (r)
3830                 return r;
3831
3832         r = gfx_v8_0_cp_resume(adev);
3833         if (r)
3834                 return r;
3835
3836         return r;
3837 }
3838
3839 static int gfx_v8_0_hw_fini(void *handle)
3840 {
3841         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3842
3843         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3844         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3845         gfx_v8_0_cp_enable(adev, false);
3846         gfx_v8_0_rlc_stop(adev);
3847         gfx_v8_0_cp_compute_fini(adev);
3848
3849         return 0;
3850 }
3851
3852 static int gfx_v8_0_suspend(void *handle)
3853 {
3854         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3855
3856         return gfx_v8_0_hw_fini(adev);
3857 }
3858
3859 static int gfx_v8_0_resume(void *handle)
3860 {
3861         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3862
3863         return gfx_v8_0_hw_init(adev);
3864 }
3865
3866 static bool gfx_v8_0_is_idle(void *handle)
3867 {
3868         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3869
3870         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3871                 return false;
3872         else
3873                 return true;
3874 }
3875
3876 static int gfx_v8_0_wait_for_idle(void *handle)
3877 {
3878         unsigned i;
3879         u32 tmp;
3880         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3881
3882         for (i = 0; i < adev->usec_timeout; i++) {
3883                 /* read MC_STATUS */
3884                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3885
3886                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3887                         return 0;
3888                 udelay(1);
3889         }
3890         return -ETIMEDOUT;
3891 }
3892
3893 static void gfx_v8_0_print_status(void *handle)
3894 {
3895         int i;
3896         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3897
3898         dev_info(adev->dev, "GFX 8.x registers\n");
3899         dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
3900                  RREG32(mmGRBM_STATUS));
3901         dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
3902                  RREG32(mmGRBM_STATUS2));
3903         dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3904                  RREG32(mmGRBM_STATUS_SE0));
3905         dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3906                  RREG32(mmGRBM_STATUS_SE1));
3907         dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3908                  RREG32(mmGRBM_STATUS_SE2));
3909         dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3910                  RREG32(mmGRBM_STATUS_SE3));
3911         dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3912         dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3913                  RREG32(mmCP_STALLED_STAT1));
3914         dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3915                  RREG32(mmCP_STALLED_STAT2));
3916         dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3917                  RREG32(mmCP_STALLED_STAT3));
3918         dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3919                  RREG32(mmCP_CPF_BUSY_STAT));
3920         dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3921                  RREG32(mmCP_CPF_STALLED_STAT1));
3922         dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3923         dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3924         dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3925                  RREG32(mmCP_CPC_STALLED_STAT1));
3926         dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3927
3928         for (i = 0; i < 32; i++) {
3929                 dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
3930                          i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3931         }
3932         for (i = 0; i < 16; i++) {
3933                 dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
3934                          i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3935         }
3936         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3937                 dev_info(adev->dev, "  se: %d\n", i);
3938                 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3939                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
3940                          RREG32(mmPA_SC_RASTER_CONFIG));
3941                 dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
3942                          RREG32(mmPA_SC_RASTER_CONFIG_1));
3943         }
3944         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3945
3946         dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
3947                  RREG32(mmGB_ADDR_CONFIG));
3948         dev_info(adev->dev, "  HDP_ADDR_CONFIG=0x%08X\n",
3949                  RREG32(mmHDP_ADDR_CONFIG));
3950         dev_info(adev->dev, "  DMIF_ADDR_CALC=0x%08X\n",
3951                  RREG32(mmDMIF_ADDR_CALC));
3952
3953         dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
3954                  RREG32(mmCP_MEQ_THRESHOLDS));
3955         dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
3956                  RREG32(mmSX_DEBUG_1));
3957         dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
3958                  RREG32(mmTA_CNTL_AUX));
3959         dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
3960                  RREG32(mmSPI_CONFIG_CNTL));
3961         dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
3962                  RREG32(mmSQ_CONFIG));
3963         dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
3964                  RREG32(mmDB_DEBUG));
3965         dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
3966                  RREG32(mmDB_DEBUG2));
3967         dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
3968                  RREG32(mmDB_DEBUG3));
3969         dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
3970                  RREG32(mmCB_HW_CONTROL));
3971         dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
3972                  RREG32(mmSPI_CONFIG_CNTL_1));
3973         dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
3974                  RREG32(mmPA_SC_FIFO_SIZE));
3975         dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
3976                  RREG32(mmVGT_NUM_INSTANCES));
3977         dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
3978                  RREG32(mmCP_PERFMON_CNTL));
3979         dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
3980                  RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
3981         dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
3982                  RREG32(mmVGT_CACHE_INVALIDATION));
3983         dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
3984                  RREG32(mmVGT_GS_VERTEX_REUSE));
3985         dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
3986                  RREG32(mmPA_SC_LINE_STIPPLE_STATE));
3987         dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
3988                  RREG32(mmPA_CL_ENHANCE));
3989         dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
3990                  RREG32(mmPA_SC_ENHANCE));
3991
3992         dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
3993                  RREG32(mmCP_ME_CNTL));
3994         dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
3995                  RREG32(mmCP_MAX_CONTEXT));
3996         dev_info(adev->dev, "  CP_ENDIAN_SWAP=0x%08X\n",
3997                  RREG32(mmCP_ENDIAN_SWAP));
3998         dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
3999                  RREG32(mmCP_DEVICE_ID));
4000
4001         dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
4002                  RREG32(mmCP_SEM_WAIT_TIMER));
4003
4004         dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
4005                  RREG32(mmCP_RB_WPTR_DELAY));
4006         dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
4007                  RREG32(mmCP_RB_VMID));
4008         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4009                  RREG32(mmCP_RB0_CNTL));
4010         dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
4011                  RREG32(mmCP_RB0_WPTR));
4012         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
4013                  RREG32(mmCP_RB0_RPTR_ADDR));
4014         dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4015                  RREG32(mmCP_RB0_RPTR_ADDR_HI));
4016         dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
4017                  RREG32(mmCP_RB0_CNTL));
4018         dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
4019                  RREG32(mmCP_RB0_BASE));
4020         dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
4021                  RREG32(mmCP_RB0_BASE_HI));
4022         dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
4023                  RREG32(mmCP_MEC_CNTL));
4024         dev_info(adev->dev, "  CP_CPF_DEBUG=0x%08X\n",
4025                  RREG32(mmCP_CPF_DEBUG));
4026
4027         dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
4028                  RREG32(mmSCRATCH_ADDR));
4029         dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
4030                  RREG32(mmSCRATCH_UMSK));
4031
4032         dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
4033                  RREG32(mmCP_INT_CNTL_RING0));
4034         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4035                  RREG32(mmRLC_LB_CNTL));
4036         dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
4037                  RREG32(mmRLC_CNTL));
4038         dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
4039                  RREG32(mmRLC_CGCG_CGLS_CTRL));
4040         dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
4041                  RREG32(mmRLC_LB_CNTR_INIT));
4042         dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
4043                  RREG32(mmRLC_LB_CNTR_MAX));
4044         dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
4045                  RREG32(mmRLC_LB_INIT_CU_MASK));
4046         dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
4047                  RREG32(mmRLC_LB_PARAMS));
4048         dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
4049                  RREG32(mmRLC_LB_CNTL));
4050         dev_info(adev->dev, "  RLC_MC_CNTL=0x%08X\n",
4051                  RREG32(mmRLC_MC_CNTL));
4052         dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
4053                  RREG32(mmRLC_UCODE_CNTL));
4054
4055         mutex_lock(&adev->srbm_mutex);
4056         for (i = 0; i < 16; i++) {
4057                 vi_srbm_select(adev, 0, 0, 0, i);
4058                 dev_info(adev->dev, "  VM %d:\n", i);
4059                 dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
4060                          RREG32(mmSH_MEM_CONFIG));
4061                 dev_info(adev->dev, "  SH_MEM_APE1_BASE=0x%08X\n",
4062                          RREG32(mmSH_MEM_APE1_BASE));
4063                 dev_info(adev->dev, "  SH_MEM_APE1_LIMIT=0x%08X\n",
4064                          RREG32(mmSH_MEM_APE1_LIMIT));
4065                 dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
4066                          RREG32(mmSH_MEM_BASES));
4067         }
4068         vi_srbm_select(adev, 0, 0, 0, 0);
4069         mutex_unlock(&adev->srbm_mutex);
4070 }
4071
4072 static int gfx_v8_0_soft_reset(void *handle)
4073 {
4074         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4075         u32 tmp;
4076         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4077
4078         /* GRBM_STATUS */
4079         tmp = RREG32(mmGRBM_STATUS);
4080         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4081                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4082                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4083                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4084                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4085                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4086                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4087                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4088                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4089                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4090         }
4091
4092         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4093                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4094                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4095                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4096                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4097         }
4098
4099         /* GRBM_STATUS2 */
4100         tmp = RREG32(mmGRBM_STATUS2);
4101         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4102                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4103                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4104
4105         /* SRBM_STATUS */
4106         tmp = RREG32(mmSRBM_STATUS);
4107         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4108                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4109                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4110
4111         if (grbm_soft_reset || srbm_soft_reset) {
4112                 gfx_v8_0_print_status((void *)adev);
4113                 /* stop the rlc */
4114                 gfx_v8_0_rlc_stop(adev);
4115
4116                 /* Disable GFX parsing/prefetching */
4117                 gfx_v8_0_cp_gfx_enable(adev, false);
4118
4119                 /* Disable MEC parsing/prefetching */
4120                 gfx_v8_0_cp_compute_enable(adev, false);
4121
4122                 if (grbm_soft_reset || srbm_soft_reset) {
4123                         tmp = RREG32(mmGMCON_DEBUG);
4124                         tmp = REG_SET_FIELD(tmp,
4125                                             GMCON_DEBUG, GFX_STALL, 1);
4126                         tmp = REG_SET_FIELD(tmp,
4127                                             GMCON_DEBUG, GFX_CLEAR, 1);
4128                         WREG32(mmGMCON_DEBUG, tmp);
4129
4130                         udelay(50);
4131                 }
4132
4133                 if (grbm_soft_reset) {
4134                         tmp = RREG32(mmGRBM_SOFT_RESET);
4135                         tmp |= grbm_soft_reset;
4136                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4137                         WREG32(mmGRBM_SOFT_RESET, tmp);
4138                         tmp = RREG32(mmGRBM_SOFT_RESET);
4139
4140                         udelay(50);
4141
4142                         tmp &= ~grbm_soft_reset;
4143                         WREG32(mmGRBM_SOFT_RESET, tmp);
4144                         tmp = RREG32(mmGRBM_SOFT_RESET);
4145                 }
4146
4147                 if (srbm_soft_reset) {
4148                         tmp = RREG32(mmSRBM_SOFT_RESET);
4149                         tmp |= srbm_soft_reset;
4150                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4151                         WREG32(mmSRBM_SOFT_RESET, tmp);
4152                         tmp = RREG32(mmSRBM_SOFT_RESET);
4153
4154                         udelay(50);
4155
4156                         tmp &= ~srbm_soft_reset;
4157                         WREG32(mmSRBM_SOFT_RESET, tmp);
4158                         tmp = RREG32(mmSRBM_SOFT_RESET);
4159                 }
4160
4161                 if (grbm_soft_reset || srbm_soft_reset) {
4162                         tmp = RREG32(mmGMCON_DEBUG);
4163                         tmp = REG_SET_FIELD(tmp,
4164                                             GMCON_DEBUG, GFX_STALL, 0);
4165                         tmp = REG_SET_FIELD(tmp,
4166                                             GMCON_DEBUG, GFX_CLEAR, 0);
4167                         WREG32(mmGMCON_DEBUG, tmp);
4168                 }
4169
4170                 /* Wait a little for things to settle down */
4171                 udelay(50);
4172                 gfx_v8_0_print_status((void *)adev);
4173         }
4174         return 0;
4175 }
4176
4177 /**
4178  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4179  *
4180  * @adev: amdgpu_device pointer
4181  *
4182  * Fetches a GPU clock counter snapshot.
4183  * Returns the 64 bit clock counter snapshot.
4184  */
4185 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4186 {
4187         uint64_t clock;
4188
4189         mutex_lock(&adev->gfx.gpu_clock_mutex);
4190         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4191         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4192                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4193         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4194         return clock;
4195 }
4196
4197 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4198                                           uint32_t vmid,
4199                                           uint32_t gds_base, uint32_t gds_size,
4200                                           uint32_t gws_base, uint32_t gws_size,
4201                                           uint32_t oa_base, uint32_t oa_size)
4202 {
4203         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4204         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4205
4206         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4207         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4208
4209         oa_base = oa_base >> AMDGPU_OA_SHIFT;
4210         oa_size = oa_size >> AMDGPU_OA_SHIFT;
4211
4212         /* GDS Base */
4213         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4214         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4215                                 WRITE_DATA_DST_SEL(0)));
4216         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4217         amdgpu_ring_write(ring, 0);
4218         amdgpu_ring_write(ring, gds_base);
4219
4220         /* GDS Size */
4221         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4222         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4223                                 WRITE_DATA_DST_SEL(0)));
4224         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4225         amdgpu_ring_write(ring, 0);
4226         amdgpu_ring_write(ring, gds_size);
4227
4228         /* GWS */
4229         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4230         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4231                                 WRITE_DATA_DST_SEL(0)));
4232         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4233         amdgpu_ring_write(ring, 0);
4234         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4235
4236         /* OA */
4237         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4238         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4239                                 WRITE_DATA_DST_SEL(0)));
4240         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4241         amdgpu_ring_write(ring, 0);
4242         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4243 }
4244
4245 static int gfx_v8_0_early_init(void *handle)
4246 {
4247         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4248
4249         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4250         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4251         gfx_v8_0_set_ring_funcs(adev);
4252         gfx_v8_0_set_irq_funcs(adev);
4253         gfx_v8_0_set_gds_init(adev);
4254         gfx_v8_0_set_rlc_funcs(adev);
4255
4256         return 0;
4257 }
4258
4259 static int gfx_v8_0_late_init(void *handle)
4260 {
4261         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4262         int r;
4263
4264         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4265         if (r)
4266                 return r;
4267
4268         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4269         if (r)
4270                 return r;
4271
4272         /* requires IBs so do in late init after IB pool is initialized */
4273         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4274         if (r)
4275                 return r;
4276
4277         return 0;
4278 }
4279
4280 static int gfx_v8_0_set_powergating_state(void *handle,
4281                                           enum amd_powergating_state state)
4282 {
4283         return 0;
4284 }
4285
4286 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
4287                                      uint32_t reg_addr, uint32_t cmd)
4288 {
4289         uint32_t data;
4290
4291         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4292
4293         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4294         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4295
4296         data = RREG32(mmRLC_SERDES_WR_CTRL);
4297         if (adev->asic_type == CHIP_STONEY)
4298                         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4299                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4300                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4301                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4302                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4303                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4304                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4305                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4306                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4307         else
4308                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4309                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4310                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4311                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4312                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4313                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4314                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4315                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4316                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4317                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4318                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4319         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4320                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4321                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4322                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4323
4324         WREG32(mmRLC_SERDES_WR_CTRL, data);
4325 }
4326
4327 #define MSG_ENTER_RLC_SAFE_MODE     1
4328 #define MSG_EXIT_RLC_SAFE_MODE      0
4329
4330 #define RLC_GPR_REG2__REQ_MASK           0x00000001
4331 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
4332 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
4333
4334 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
4335 {
4336         u32 data = 0;
4337         unsigned i;
4338
4339         data = RREG32(mmRLC_CNTL);
4340         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
4341                 return;
4342
4343         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
4344             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
4345                                AMD_PG_SUPPORT_GFX_DMG))) {
4346                 data |= RLC_GPR_REG2__REQ_MASK;
4347                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
4348                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
4349                 WREG32(mmRLC_GPR_REG2, data);
4350
4351                 for (i = 0; i < adev->usec_timeout; i++) {
4352                         if ((RREG32(mmRLC_GPM_STAT) &
4353                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4354                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
4355                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4356                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
4357                                 break;
4358                         udelay(1);
4359                 }
4360
4361                 for (i = 0; i < adev->usec_timeout; i++) {
4362                         if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
4363                                 break;
4364                         udelay(1);
4365                 }
4366                 adev->gfx.rlc.in_safe_mode = true;
4367         }
4368 }
4369
4370 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
4371 {
4372         u32 data;
4373         unsigned i;
4374
4375         data = RREG32(mmRLC_CNTL);
4376         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
4377                 return;
4378
4379         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
4380             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
4381                                AMD_PG_SUPPORT_GFX_DMG))) {
4382                 data |= RLC_GPR_REG2__REQ_MASK;
4383                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
4384                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
4385                 WREG32(mmRLC_GPR_REG2, data);
4386                 adev->gfx.rlc.in_safe_mode = false;
4387         }
4388
4389         for (i = 0; i < adev->usec_timeout; i++) {
4390                 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
4391                         break;
4392                 udelay(1);
4393         }
4394 }
4395
4396 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
4397 {
4398         u32 data;
4399         unsigned i;
4400
4401         data = RREG32(mmRLC_CNTL);
4402         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
4403                 return;
4404
4405         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
4406                 data |= RLC_SAFE_MODE__CMD_MASK;
4407                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
4408                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4409                 WREG32(mmRLC_SAFE_MODE, data);
4410
4411                 for (i = 0; i < adev->usec_timeout; i++) {
4412                         if ((RREG32(mmRLC_GPM_STAT) &
4413                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4414                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
4415                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4416                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
4417                                 break;
4418                         udelay(1);
4419                 }
4420
4421                 for (i = 0; i < adev->usec_timeout; i++) {
4422                         if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
4423                                 break;
4424                         udelay(1);
4425                 }
4426                 adev->gfx.rlc.in_safe_mode = true;
4427         }
4428 }
4429
4430 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
4431 {
4432         u32 data = 0;
4433         unsigned i;
4434
4435         data = RREG32(mmRLC_CNTL);
4436         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
4437                 return;
4438
4439         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
4440                 if (adev->gfx.rlc.in_safe_mode) {
4441                         data |= RLC_SAFE_MODE__CMD_MASK;
4442                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
4443                         WREG32(mmRLC_SAFE_MODE, data);
4444                         adev->gfx.rlc.in_safe_mode = false;
4445                 }
4446         }
4447
4448         for (i = 0; i < adev->usec_timeout; i++) {
4449                 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
4450                         break;
4451                 udelay(1);
4452         }
4453 }
4454
4455 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
4456 {
4457         adev->gfx.rlc.in_safe_mode = true;
4458 }
4459
4460 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
4461 {
4462         adev->gfx.rlc.in_safe_mode = false;
4463 }
4464
4465 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
4466         .enter_safe_mode = cz_enter_rlc_safe_mode,
4467         .exit_safe_mode = cz_exit_rlc_safe_mode
4468 };
4469
4470 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
4471         .enter_safe_mode = iceland_enter_rlc_safe_mode,
4472         .exit_safe_mode = iceland_exit_rlc_safe_mode
4473 };
4474
4475 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
4476         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
4477         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
4478 };
4479
4480 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4481                                                       bool enable)
4482 {
4483         uint32_t temp, data;
4484
4485         adev->gfx.rlc.funcs->enter_safe_mode(adev);
4486
4487         /* It is disabled by HW by default */
4488         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4489                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4490                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4491                                 /* 1 - RLC memory Light sleep */
4492                                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4493                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4494                                 if (temp != data)
4495                                         WREG32(mmRLC_MEM_SLP_CNTL, data);
4496                         }
4497
4498                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4499                                 /* 2 - CP memory Light sleep */
4500                                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4501                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4502                                 if (temp != data)
4503                                         WREG32(mmCP_MEM_SLP_CNTL, data);
4504                         }
4505                 }
4506
4507                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
4508                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4509                 if (adev->flags & AMD_IS_APU)
4510                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4511                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4512                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
4513                 else
4514                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4515                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4516                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4517                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4518
4519                 if (temp != data)
4520                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4521
4522                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4523                 gfx_v8_0_wait_for_rlc_serdes(adev);
4524
4525                 /* 5 - clear mgcg override */
4526                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4527
4528                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
4529                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4530                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4531                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4532                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4533                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4534                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4535                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
4536                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
4537                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4538                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4539                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4540                         if (temp != data)
4541                                 WREG32(mmCGTS_SM_CTRL_REG, data);
4542                 }
4543                 udelay(50);
4544
4545                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4546                 gfx_v8_0_wait_for_rlc_serdes(adev);
4547         } else {
4548                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4549                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4550                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4551                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4552                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4553                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4554                 if (temp != data)
4555                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4556
4557                 /* 2 - disable MGLS in RLC */
4558                 data = RREG32(mmRLC_MEM_SLP_CNTL);
4559                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4560                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4561                         WREG32(mmRLC_MEM_SLP_CNTL, data);
4562                 }
4563
4564                 /* 3 - disable MGLS in CP */
4565                 data = RREG32(mmCP_MEM_SLP_CNTL);
4566                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4567                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4568                         WREG32(mmCP_MEM_SLP_CNTL, data);
4569                 }
4570
4571                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4572                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4573                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4574                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4575                 if (temp != data)
4576                         WREG32(mmCGTS_SM_CTRL_REG, data);
4577
4578                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4579                 gfx_v8_0_wait_for_rlc_serdes(adev);
4580
4581                 /* 6 - set mgcg override */
4582                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4583
4584                 udelay(50);
4585
4586                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4587                 gfx_v8_0_wait_for_rlc_serdes(adev);
4588         }
4589
4590         adev->gfx.rlc.funcs->exit_safe_mode(adev);
4591 }
4592
4593 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4594                                                       bool enable)
4595 {
4596         uint32_t temp, temp1, data, data1;
4597
4598         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4599
4600         adev->gfx.rlc.funcs->enter_safe_mode(adev);
4601
4602         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4603                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4604                  * Cmp_busy/GFX_Idle interrupts
4605                  */
4606                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4607
4608                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4609                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4610                 if (temp1 != data1)
4611                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4612
4613                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4614                 gfx_v8_0_wait_for_rlc_serdes(adev);
4615
4616                 /* 3 - clear cgcg override */
4617                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4618
4619                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4620                 gfx_v8_0_wait_for_rlc_serdes(adev);
4621
4622                 /* 4 - write cmd to set CGLS */
4623                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4624
4625                 /* 5 - enable cgcg */
4626                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4627
4628                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
4629                         /* enable cgls*/
4630                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4631
4632                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4633                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4634
4635                         if (temp1 != data1)
4636                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4637                 } else {
4638                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4639                 }
4640
4641                 if (temp != data)
4642                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4643         } else {
4644                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
4645                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4646
4647                 /* TEST CGCG */
4648                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4649                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4650                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4651                 if (temp1 != data1)
4652                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4653
4654                 /* read gfx register to wake up cgcg */
4655                 RREG32(mmCB_CGTT_SCLK_CTRL);
4656                 RREG32(mmCB_CGTT_SCLK_CTRL);
4657                 RREG32(mmCB_CGTT_SCLK_CTRL);
4658                 RREG32(mmCB_CGTT_SCLK_CTRL);
4659
4660                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4661                 gfx_v8_0_wait_for_rlc_serdes(adev);
4662
4663                 /* write cmd to Set CGCG Overrride */
4664                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4665
4666                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4667                 gfx_v8_0_wait_for_rlc_serdes(adev);
4668
4669                 /* write cmd to Clear CGLS */
4670                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4671
4672                 /* disable cgcg, cgls should be disabled too. */
4673                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4674                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4675                 if (temp != data)
4676                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4677         }
4678
4679         adev->gfx.rlc.funcs->exit_safe_mode(adev);
4680 }
4681 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4682                                             bool enable)
4683 {
4684         if (enable) {
4685                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4686                  * ===  MGCG + MGLS + TS(CG/LS) ===
4687                  */
4688                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
4689                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
4690         } else {
4691                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4692                  * ===  CGCG + CGLS ===
4693                  */
4694                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
4695                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
4696         }
4697         return 0;
4698 }
4699
4700 static int gfx_v8_0_set_clockgating_state(void *handle,
4701                                           enum amd_clockgating_state state)
4702 {
4703         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4704
4705         switch (adev->asic_type) {
4706         case CHIP_FIJI:
4707         case CHIP_CARRIZO:
4708         case CHIP_STONEY:
4709                 gfx_v8_0_update_gfx_clock_gating(adev,
4710                                                  state == AMD_CG_STATE_GATE ? true : false);
4711                 break;
4712         default:
4713                 break;
4714         }
4715         return 0;
4716 }
4717
4718 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4719 {
4720         u32 rptr;
4721
4722         rptr = ring->adev->wb.wb[ring->rptr_offs];
4723
4724         return rptr;
4725 }
4726
4727 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4728 {
4729         struct amdgpu_device *adev = ring->adev;
4730         u32 wptr;
4731
4732         if (ring->use_doorbell)
4733                 /* XXX check if swapping is necessary on BE */
4734                 wptr = ring->adev->wb.wb[ring->wptr_offs];
4735         else
4736                 wptr = RREG32(mmCP_RB0_WPTR);
4737
4738         return wptr;
4739 }
4740
4741 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4742 {
4743         struct amdgpu_device *adev = ring->adev;
4744
4745         if (ring->use_doorbell) {
4746                 /* XXX check if swapping is necessary on BE */
4747                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4748                 WDOORBELL32(ring->doorbell_index, ring->wptr);
4749         } else {
4750                 WREG32(mmCP_RB0_WPTR, ring->wptr);
4751                 (void)RREG32(mmCP_RB0_WPTR);
4752         }
4753 }
4754
4755 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4756 {
4757         u32 ref_and_mask, reg_mem_engine;
4758
4759         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4760                 switch (ring->me) {
4761                 case 1:
4762                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4763                         break;
4764                 case 2:
4765                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4766                         break;
4767                 default:
4768                         return;
4769                 }
4770                 reg_mem_engine = 0;
4771         } else {
4772                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4773                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4774         }
4775
4776         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4777         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4778                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
4779                                  reg_mem_engine));
4780         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4781         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4782         amdgpu_ring_write(ring, ref_and_mask);
4783         amdgpu_ring_write(ring, ref_and_mask);
4784         amdgpu_ring_write(ring, 0x20); /* poll interval */
4785 }
4786
4787 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
4788 {
4789         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4790         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4791                                  WRITE_DATA_DST_SEL(0) |
4792                                  WR_CONFIRM));
4793         amdgpu_ring_write(ring, mmHDP_DEBUG0);
4794         amdgpu_ring_write(ring, 0);
4795         amdgpu_ring_write(ring, 1);
4796
4797 }
4798
4799 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4800                                   struct amdgpu_ib *ib)
4801 {
4802         bool need_ctx_switch = ring->current_ctx != ib->ctx;
4803         u32 header, control = 0;
4804         u32 next_rptr = ring->wptr + 5;
4805
4806         /* drop the CE preamble IB for the same context */
4807         if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4808                 return;
4809
4810         if (need_ctx_switch)
4811                 next_rptr += 2;
4812
4813         next_rptr += 4;
4814         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4815         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4816         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4817         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4818         amdgpu_ring_write(ring, next_rptr);
4819
4820         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
4821         if (need_ctx_switch) {
4822                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4823                 amdgpu_ring_write(ring, 0);
4824         }
4825
4826         if (ib->flags & AMDGPU_IB_FLAG_CE)
4827                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4828         else
4829                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4830
4831         control |= ib->length_dw | (ib->vm_id << 24);
4832
4833         amdgpu_ring_write(ring, header);
4834         amdgpu_ring_write(ring,
4835 #ifdef __BIG_ENDIAN
4836                           (2 << 0) |
4837 #endif
4838                           (ib->gpu_addr & 0xFFFFFFFC));
4839         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4840         amdgpu_ring_write(ring, control);
4841 }
4842
4843 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4844                                   struct amdgpu_ib *ib)
4845 {
4846         u32 header, control = 0;
4847         u32 next_rptr = ring->wptr + 5;
4848
4849         control |= INDIRECT_BUFFER_VALID;
4850
4851         next_rptr += 4;
4852         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4853         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4854         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4855         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4856         amdgpu_ring_write(ring, next_rptr);
4857
4858         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4859
4860         control |= ib->length_dw | (ib->vm_id << 24);
4861
4862         amdgpu_ring_write(ring, header);
4863         amdgpu_ring_write(ring,
4864 #ifdef __BIG_ENDIAN
4865                                           (2 << 0) |
4866 #endif
4867                                           (ib->gpu_addr & 0xFFFFFFFC));
4868         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4869         amdgpu_ring_write(ring, control);
4870 }
4871
4872 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4873                                          u64 seq, unsigned flags)
4874 {
4875         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4876         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4877
4878         /* EVENT_WRITE_EOP - flush caches, send int */
4879         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4880         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4881                                  EOP_TC_ACTION_EN |
4882                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4883                                  EVENT_INDEX(5)));
4884         amdgpu_ring_write(ring, addr & 0xfffffffc);
4885         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4886                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4887         amdgpu_ring_write(ring, lower_32_bits(seq));
4888         amdgpu_ring_write(ring, upper_32_bits(seq));
4889
4890 }
4891
4892 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4893 {
4894         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4895         uint32_t seq = ring->fence_drv.sync_seq;
4896         uint64_t addr = ring->fence_drv.gpu_addr;
4897
4898         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4899         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4900                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
4901                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
4902         amdgpu_ring_write(ring, addr & 0xfffffffc);
4903         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4904         amdgpu_ring_write(ring, seq);
4905         amdgpu_ring_write(ring, 0xffffffff);
4906         amdgpu_ring_write(ring, 4); /* poll interval */
4907
4908         if (usepfp) {
4909                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4910                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4911                 amdgpu_ring_write(ring, 0);
4912                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4913                 amdgpu_ring_write(ring, 0);
4914         }
4915 }
4916
4917 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4918                                         unsigned vm_id, uint64_t pd_addr)
4919 {
4920         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4921
4922         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4923         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4924                                  WRITE_DATA_DST_SEL(0)) |
4925                                  WR_CONFIRM);
4926         if (vm_id < 8) {
4927                 amdgpu_ring_write(ring,
4928                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4929         } else {
4930                 amdgpu_ring_write(ring,
4931                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4932         }
4933         amdgpu_ring_write(ring, 0);
4934         amdgpu_ring_write(ring, pd_addr >> 12);
4935
4936         /* bits 0-15 are the VM contexts0-15 */
4937         /* invalidate the cache */
4938         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4939         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4940                                  WRITE_DATA_DST_SEL(0)));
4941         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4942         amdgpu_ring_write(ring, 0);
4943         amdgpu_ring_write(ring, 1 << vm_id);
4944
4945         /* wait for the invalidate to complete */
4946         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4947         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4948                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
4949                                  WAIT_REG_MEM_ENGINE(0))); /* me */
4950         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4951         amdgpu_ring_write(ring, 0);
4952         amdgpu_ring_write(ring, 0); /* ref */
4953         amdgpu_ring_write(ring, 0); /* mask */
4954         amdgpu_ring_write(ring, 0x20); /* poll interval */
4955
4956         /* compute doesn't have PFP */
4957         if (usepfp) {
4958                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4959                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4960                 amdgpu_ring_write(ring, 0x0);
4961                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4962                 amdgpu_ring_write(ring, 0);
4963                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4964                 amdgpu_ring_write(ring, 0);
4965         }
4966 }
4967
4968 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4969 {
4970         return ring->adev->wb.wb[ring->rptr_offs];
4971 }
4972
4973 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4974 {
4975         return ring->adev->wb.wb[ring->wptr_offs];
4976 }
4977
4978 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4979 {
4980         struct amdgpu_device *adev = ring->adev;
4981
4982         /* XXX check if swapping is necessary on BE */
4983         adev->wb.wb[ring->wptr_offs] = ring->wptr;
4984         WDOORBELL32(ring->doorbell_index, ring->wptr);
4985 }
4986
4987 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4988                                              u64 addr, u64 seq,
4989                                              unsigned flags)
4990 {
4991         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4992         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4993
4994         /* RELEASE_MEM - flush caches, send int */
4995         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4996         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4997                                  EOP_TC_ACTION_EN |
4998                                  EOP_TC_WB_ACTION_EN |
4999                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5000                                  EVENT_INDEX(5)));
5001         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5002         amdgpu_ring_write(ring, addr & 0xfffffffc);
5003         amdgpu_ring_write(ring, upper_32_bits(addr));
5004         amdgpu_ring_write(ring, lower_32_bits(seq));
5005         amdgpu_ring_write(ring, upper_32_bits(seq));
5006 }
5007
5008 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5009                                                  enum amdgpu_interrupt_state state)
5010 {
5011         u32 cp_int_cntl;
5012
5013         switch (state) {
5014         case AMDGPU_IRQ_STATE_DISABLE:
5015                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5016                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5017                                             TIME_STAMP_INT_ENABLE, 0);
5018                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5019                 break;
5020         case AMDGPU_IRQ_STATE_ENABLE:
5021                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5022                 cp_int_cntl =
5023                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5024                                       TIME_STAMP_INT_ENABLE, 1);
5025                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5026                 break;
5027         default:
5028                 break;
5029         }
5030 }
5031
5032 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5033                                                      int me, int pipe,
5034                                                      enum amdgpu_interrupt_state state)
5035 {
5036         u32 mec_int_cntl, mec_int_cntl_reg;
5037
5038         /*
5039          * amdgpu controls only pipe 0 of MEC1. That's why this function only
5040          * handles the setting of interrupts for this specific pipe. All other
5041          * pipes' interrupts are set by amdkfd.
5042          */
5043
5044         if (me == 1) {
5045                 switch (pipe) {
5046                 case 0:
5047                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5048                         break;
5049                 default:
5050                         DRM_DEBUG("invalid pipe %d\n", pipe);
5051                         return;
5052                 }
5053         } else {
5054                 DRM_DEBUG("invalid me %d\n", me);
5055                 return;
5056         }
5057
5058         switch (state) {
5059         case AMDGPU_IRQ_STATE_DISABLE:
5060                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5061                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5062                                              TIME_STAMP_INT_ENABLE, 0);
5063                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5064                 break;
5065         case AMDGPU_IRQ_STATE_ENABLE:
5066                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5067                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5068                                              TIME_STAMP_INT_ENABLE, 1);
5069                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5070                 break;
5071         default:
5072                 break;
5073         }
5074 }
5075
5076 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5077                                              struct amdgpu_irq_src *source,
5078                                              unsigned type,
5079                                              enum amdgpu_interrupt_state state)
5080 {
5081         u32 cp_int_cntl;
5082
5083         switch (state) {
5084         case AMDGPU_IRQ_STATE_DISABLE:
5085                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5086                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5087                                             PRIV_REG_INT_ENABLE, 0);
5088                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5089                 break;
5090         case AMDGPU_IRQ_STATE_ENABLE:
5091                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5092                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5093                                             PRIV_REG_INT_ENABLE, 1);
5094                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5095                 break;
5096         default:
5097                 break;
5098         }
5099
5100         return 0;
5101 }
5102
5103 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5104                                               struct amdgpu_irq_src *source,
5105                                               unsigned type,
5106                                               enum amdgpu_interrupt_state state)
5107 {
5108         u32 cp_int_cntl;
5109
5110         switch (state) {
5111         case AMDGPU_IRQ_STATE_DISABLE:
5112                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5113                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5114                                             PRIV_INSTR_INT_ENABLE, 0);
5115                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5116                 break;
5117         case AMDGPU_IRQ_STATE_ENABLE:
5118                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5119                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5120                                             PRIV_INSTR_INT_ENABLE, 1);
5121                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5122                 break;
5123         default:
5124                 break;
5125         }
5126
5127         return 0;
5128 }
5129
5130 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5131                                             struct amdgpu_irq_src *src,
5132                                             unsigned type,
5133                                             enum amdgpu_interrupt_state state)
5134 {
5135         switch (type) {
5136         case AMDGPU_CP_IRQ_GFX_EOP:
5137                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
5138                 break;
5139         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5140                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5141                 break;
5142         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5143                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5144                 break;
5145         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5146                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5147                 break;
5148         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5149                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5150                 break;
5151         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5152                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5153                 break;
5154         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5155                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5156                 break;
5157         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5158                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5159                 break;
5160         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5161                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5162                 break;
5163         default:
5164                 break;
5165         }
5166         return 0;
5167 }
5168
5169 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
5170                             struct amdgpu_irq_src *source,
5171                             struct amdgpu_iv_entry *entry)
5172 {
5173         int i;
5174         u8 me_id, pipe_id, queue_id;
5175         struct amdgpu_ring *ring;
5176
5177         DRM_DEBUG("IH: CP EOP\n");
5178         me_id = (entry->ring_id & 0x0c) >> 2;
5179         pipe_id = (entry->ring_id & 0x03) >> 0;
5180         queue_id = (entry->ring_id & 0x70) >> 4;
5181
5182         switch (me_id) {
5183         case 0:
5184                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5185                 break;
5186         case 1:
5187         case 2:
5188                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5189                         ring = &adev->gfx.compute_ring[i];
5190                         /* Per-queue interrupt is supported for MEC starting from VI.
5191                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5192                           */
5193                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5194                                 amdgpu_fence_process(ring);
5195                 }
5196                 break;
5197         }
5198         return 0;
5199 }
5200
5201 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5202                                  struct amdgpu_irq_src *source,
5203                                  struct amdgpu_iv_entry *entry)
5204 {
5205         DRM_ERROR("Illegal register access in command stream\n");
5206         schedule_work(&adev->reset_work);
5207         return 0;
5208 }
5209
5210 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5211                                   struct amdgpu_irq_src *source,
5212                                   struct amdgpu_iv_entry *entry)
5213 {
5214         DRM_ERROR("Illegal instruction in command stream\n");
5215         schedule_work(&adev->reset_work);
5216         return 0;
5217 }
5218
5219 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5220         .early_init = gfx_v8_0_early_init,
5221         .late_init = gfx_v8_0_late_init,
5222         .sw_init = gfx_v8_0_sw_init,
5223         .sw_fini = gfx_v8_0_sw_fini,
5224         .hw_init = gfx_v8_0_hw_init,
5225         .hw_fini = gfx_v8_0_hw_fini,
5226         .suspend = gfx_v8_0_suspend,
5227         .resume = gfx_v8_0_resume,
5228         .is_idle = gfx_v8_0_is_idle,
5229         .wait_for_idle = gfx_v8_0_wait_for_idle,
5230         .soft_reset = gfx_v8_0_soft_reset,
5231         .print_status = gfx_v8_0_print_status,
5232         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
5233         .set_powergating_state = gfx_v8_0_set_powergating_state,
5234 };
5235
5236 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5237         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5238         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5239         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5240         .parse_cs = NULL,
5241         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5242         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5243         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5244         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5245         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5246         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5247         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5248         .test_ring = gfx_v8_0_ring_test_ring,
5249         .test_ib = gfx_v8_0_ring_test_ib,
5250         .insert_nop = amdgpu_ring_insert_nop,
5251         .pad_ib = amdgpu_ring_generic_pad_ib,
5252 };
5253
5254 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5255         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
5256         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
5257         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
5258         .parse_cs = NULL,
5259         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
5260         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
5261         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5262         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5263         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5264         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5265         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5266         .test_ring = gfx_v8_0_ring_test_ring,
5267         .test_ib = gfx_v8_0_ring_test_ib,
5268         .insert_nop = amdgpu_ring_insert_nop,
5269         .pad_ib = amdgpu_ring_generic_pad_ib,
5270 };
5271
5272 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5273 {
5274         int i;
5275
5276         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5277                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5278
5279         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5280                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5281 }
5282
5283 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5284         .set = gfx_v8_0_set_eop_interrupt_state,
5285         .process = gfx_v8_0_eop_irq,
5286 };
5287
5288 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5289         .set = gfx_v8_0_set_priv_reg_fault_state,
5290         .process = gfx_v8_0_priv_reg_irq,
5291 };
5292
5293 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5294         .set = gfx_v8_0_set_priv_inst_fault_state,
5295         .process = gfx_v8_0_priv_inst_irq,
5296 };
5297
5298 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5299 {
5300         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5301         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5302
5303         adev->gfx.priv_reg_irq.num_types = 1;
5304         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5305
5306         adev->gfx.priv_inst_irq.num_types = 1;
5307         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5308 }
5309
5310 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
5311 {
5312         switch (adev->asic_type) {
5313         case CHIP_TOPAZ:
5314         case CHIP_STONEY:
5315                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
5316                 break;
5317         case CHIP_CARRIZO:
5318                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
5319                 break;
5320         default:
5321                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
5322                 break;
5323         }
5324 }
5325
5326 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5327 {
5328         /* init asci gds info */
5329         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5330         adev->gds.gws.total_size = 64;
5331         adev->gds.oa.total_size = 16;
5332
5333         if (adev->gds.mem.total_size == 64 * 1024) {
5334                 adev->gds.mem.gfx_partition_size = 4096;
5335                 adev->gds.mem.cs_partition_size = 4096;
5336
5337                 adev->gds.gws.gfx_partition_size = 4;
5338                 adev->gds.gws.cs_partition_size = 4;
5339
5340                 adev->gds.oa.gfx_partition_size = 4;
5341                 adev->gds.oa.cs_partition_size = 1;
5342         } else {
5343                 adev->gds.mem.gfx_partition_size = 1024;
5344                 adev->gds.mem.cs_partition_size = 1024;
5345
5346                 adev->gds.gws.gfx_partition_size = 16;
5347                 adev->gds.gws.cs_partition_size = 16;
5348
5349                 adev->gds.oa.gfx_partition_size = 4;
5350                 adev->gds.oa.cs_partition_size = 4;
5351         }
5352 }
5353
5354 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5355 {
5356         u32 data, mask;
5357
5358         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5359         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5360
5361         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5362         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5363
5364         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
5365
5366         return (~data) & mask;
5367 }
5368
5369 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5370                          struct amdgpu_cu_info *cu_info)
5371 {
5372         int i, j, k, counter, active_cu_number = 0;
5373         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5374
5375         if (!adev || !cu_info)
5376                 return -EINVAL;
5377
5378         memset(cu_info, 0, sizeof(*cu_info));
5379
5380         mutex_lock(&adev->grbm_idx_mutex);
5381         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5382                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5383                         mask = 1;
5384                         ao_bitmap = 0;
5385                         counter = 0;
5386                         gfx_v8_0_select_se_sh(adev, i, j);
5387                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
5388                         cu_info->bitmap[i][j] = bitmap;
5389
5390                         for (k = 0; k < 16; k ++) {
5391                                 if (bitmap & mask) {
5392                                         if (counter < 2)
5393                                                 ao_bitmap |= mask;
5394                                         counter ++;
5395                                 }
5396                                 mask <<= 1;
5397                         }
5398                         active_cu_number += counter;
5399                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5400                 }
5401         }
5402         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5403         mutex_unlock(&adev->grbm_idx_mutex);
5404
5405         cu_info->number = active_cu_number;
5406         cu_info->ao_cu_mask = ao_cu_mask;
5407
5408         return 0;
5409 }