Merge remote-tracking branch 'jk/vfs' into work.misc
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160
161 static const u32 golden_settings_tonga_a11[] =
162 {
163         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166         mmGB_GPU_ID, 0x0000000f, 0x00000000,
167         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179
180 static const u32 tonga_golden_common_all[] =
181 {
182         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
274         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
275         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
276         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
277         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
278         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
279         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
280         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
281         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
282         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
283         mmSQ_CONFIG, 0x07f80000, 0x01180000,
284         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
285         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
286         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
287         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
288         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
289         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
290 };
291
292 static const u32 polaris11_golden_common_all[] =
293 {
294         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
296         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 };
301
302 static const u32 golden_settings_polaris10_a11[] =
303 {
304         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
305         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
307         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
312         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
313         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315         mmSQ_CONFIG, 0x07f80000, 0x07180000,
316         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322
323 static const u32 polaris10_golden_common_all[] =
324 {
325         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
327         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
328         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
332         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
333 };
334
335 static const u32 fiji_golden_common_all[] =
336 {
337         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
338         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
339         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
340         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
341         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
342         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
343         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
344         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
345         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
347 };
348
349 static const u32 golden_settings_fiji_a10[] =
350 {
351         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
352         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
353         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
354         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
355         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
356         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
357         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
358         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
359         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
360         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
361         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
362 };
363
364 static const u32 fiji_mgcg_cgcg_init[] =
365 {
366         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
367         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
368         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
373         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
375         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
377         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
384         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
385         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
386         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
387         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
388         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
391         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
392         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
393         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
396         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
397         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
398         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
399         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
401 };
402
403 static const u32 golden_settings_iceland_a11[] =
404 {
405         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
406         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
407         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
408         mmGB_GPU_ID, 0x0000000f, 0x00000000,
409         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
410         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
411         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
412         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
413         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
414         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
415         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
416         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
417         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
418         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
419         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
420         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
421 };
422
423 static const u32 iceland_golden_common_all[] =
424 {
425         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
426         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
427         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
428         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
429         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
430         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
431         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
432         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
433 };
434
435 static const u32 iceland_mgcg_cgcg_init[] =
436 {
437         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
438         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
439         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
442         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
443         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
444         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
446         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
448         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
456         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
457         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
458         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
459         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
460         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
462         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
463         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
464         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
466         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
467         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
468         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
471         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
476         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
481         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
486         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
491         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
496         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
499         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
500         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
501 };
502
503 static const u32 cz_golden_settings_a11[] =
504 {
505         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
506         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
507         mmGB_GPU_ID, 0x0000000f, 0x00000000,
508         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
509         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
510         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
511         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
512         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
513         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
516         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
517 };
518
519 static const u32 cz_golden_common_all[] =
520 {
521         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
523         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
525         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
529 };
530
531 static const u32 cz_mgcg_cgcg_init[] =
532 {
533         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
542         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
544         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
551         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
552         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
553         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
554         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
555         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
558         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
559         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
560         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
562         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
563         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
564         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
567         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
572         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
577         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
582         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
587         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
592         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
597         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
602         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
605         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
606         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
607         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
608 };
609
610 static const u32 stoney_golden_settings_a11[] =
611 {
612         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
613         mmGB_GPU_ID, 0x0000000f, 0x00000000,
614         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
615         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
616         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
617         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
618         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
619         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
620         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
621         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
622 };
623
624 static const u32 stoney_golden_common_all[] =
625 {
626         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
628         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
629         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
630         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
631         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
632         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
633         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
634 };
635
636 static const u32 stoney_mgcg_cgcg_init[] =
637 {
638         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
640         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
642         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
643         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
644 };
645
646 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
649 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
650 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
651 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
652
653 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
654 {
655         switch (adev->asic_type) {
656         case CHIP_TOPAZ:
657                 amdgpu_program_register_sequence(adev,
658                                                  iceland_mgcg_cgcg_init,
659                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
660                 amdgpu_program_register_sequence(adev,
661                                                  golden_settings_iceland_a11,
662                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
663                 amdgpu_program_register_sequence(adev,
664                                                  iceland_golden_common_all,
665                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
666                 break;
667         case CHIP_FIJI:
668                 amdgpu_program_register_sequence(adev,
669                                                  fiji_mgcg_cgcg_init,
670                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
671                 amdgpu_program_register_sequence(adev,
672                                                  golden_settings_fiji_a10,
673                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
674                 amdgpu_program_register_sequence(adev,
675                                                  fiji_golden_common_all,
676                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
677                 break;
678
679         case CHIP_TONGA:
680                 amdgpu_program_register_sequence(adev,
681                                                  tonga_mgcg_cgcg_init,
682                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
683                 amdgpu_program_register_sequence(adev,
684                                                  golden_settings_tonga_a11,
685                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
686                 amdgpu_program_register_sequence(adev,
687                                                  tonga_golden_common_all,
688                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
689                 break;
690         case CHIP_POLARIS11:
691                 amdgpu_program_register_sequence(adev,
692                                                  golden_settings_polaris11_a11,
693                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
694                 amdgpu_program_register_sequence(adev,
695                                                  polaris11_golden_common_all,
696                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
697                 break;
698         case CHIP_POLARIS10:
699                 amdgpu_program_register_sequence(adev,
700                                                  golden_settings_polaris10_a11,
701                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
702                 amdgpu_program_register_sequence(adev,
703                                                  polaris10_golden_common_all,
704                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
705                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
706                 if (adev->pdev->revision == 0xc7) {
707                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
708                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
709                 }
710                 break;
711         case CHIP_CARRIZO:
712                 amdgpu_program_register_sequence(adev,
713                                                  cz_mgcg_cgcg_init,
714                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
715                 amdgpu_program_register_sequence(adev,
716                                                  cz_golden_settings_a11,
717                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
718                 amdgpu_program_register_sequence(adev,
719                                                  cz_golden_common_all,
720                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
721                 break;
722         case CHIP_STONEY:
723                 amdgpu_program_register_sequence(adev,
724                                                  stoney_mgcg_cgcg_init,
725                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
726                 amdgpu_program_register_sequence(adev,
727                                                  stoney_golden_settings_a11,
728                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
729                 amdgpu_program_register_sequence(adev,
730                                                  stoney_golden_common_all,
731                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
732                 break;
733         default:
734                 break;
735         }
736 }
737
738 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
739 {
740         int i;
741
742         adev->gfx.scratch.num_reg = 7;
743         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
744         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
745                 adev->gfx.scratch.free[i] = true;
746                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
747         }
748 }
749
750 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
751 {
752         struct amdgpu_device *adev = ring->adev;
753         uint32_t scratch;
754         uint32_t tmp = 0;
755         unsigned i;
756         int r;
757
758         r = amdgpu_gfx_scratch_get(adev, &scratch);
759         if (r) {
760                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
761                 return r;
762         }
763         WREG32(scratch, 0xCAFEDEAD);
764         r = amdgpu_ring_alloc(ring, 3);
765         if (r) {
766                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
767                           ring->idx, r);
768                 amdgpu_gfx_scratch_free(adev, scratch);
769                 return r;
770         }
771         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
772         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
773         amdgpu_ring_write(ring, 0xDEADBEEF);
774         amdgpu_ring_commit(ring);
775
776         for (i = 0; i < adev->usec_timeout; i++) {
777                 tmp = RREG32(scratch);
778                 if (tmp == 0xDEADBEEF)
779                         break;
780                 DRM_UDELAY(1);
781         }
782         if (i < adev->usec_timeout) {
783                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
784                          ring->idx, i);
785         } else {
786                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
787                           ring->idx, scratch, tmp);
788                 r = -EINVAL;
789         }
790         amdgpu_gfx_scratch_free(adev, scratch);
791         return r;
792 }
793
794 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
795 {
796         struct amdgpu_device *adev = ring->adev;
797         struct amdgpu_ib ib;
798         struct fence *f = NULL;
799         uint32_t scratch;
800         uint32_t tmp = 0;
801         long r;
802
803         r = amdgpu_gfx_scratch_get(adev, &scratch);
804         if (r) {
805                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
806                 return r;
807         }
808         WREG32(scratch, 0xCAFEDEAD);
809         memset(&ib, 0, sizeof(ib));
810         r = amdgpu_ib_get(adev, NULL, 256, &ib);
811         if (r) {
812                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
813                 goto err1;
814         }
815         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
816         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
817         ib.ptr[2] = 0xDEADBEEF;
818         ib.length_dw = 3;
819
820         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
821         if (r)
822                 goto err2;
823
824         r = fence_wait_timeout(f, false, timeout);
825         if (r == 0) {
826                 DRM_ERROR("amdgpu: IB test timed out.\n");
827                 r = -ETIMEDOUT;
828                 goto err2;
829         } else if (r < 0) {
830                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
831                 goto err2;
832         }
833         tmp = RREG32(scratch);
834         if (tmp == 0xDEADBEEF) {
835                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
836                 r = 0;
837         } else {
838                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
839                           scratch, tmp);
840                 r = -EINVAL;
841         }
842 err2:
843         amdgpu_ib_free(adev, &ib, NULL);
844         fence_put(f);
845 err1:
846         amdgpu_gfx_scratch_free(adev, scratch);
847         return r;
848 }
849
850
851 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
852         release_firmware(adev->gfx.pfp_fw);
853         adev->gfx.pfp_fw = NULL;
854         release_firmware(adev->gfx.me_fw);
855         adev->gfx.me_fw = NULL;
856         release_firmware(adev->gfx.ce_fw);
857         adev->gfx.ce_fw = NULL;
858         release_firmware(adev->gfx.rlc_fw);
859         adev->gfx.rlc_fw = NULL;
860         release_firmware(adev->gfx.mec_fw);
861         adev->gfx.mec_fw = NULL;
862         if ((adev->asic_type != CHIP_STONEY) &&
863             (adev->asic_type != CHIP_TOPAZ))
864                 release_firmware(adev->gfx.mec2_fw);
865         adev->gfx.mec2_fw = NULL;
866
867         kfree(adev->gfx.rlc.register_list_format);
868 }
869
870 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
871 {
872         const char *chip_name;
873         char fw_name[30];
874         int err;
875         struct amdgpu_firmware_info *info = NULL;
876         const struct common_firmware_header *header = NULL;
877         const struct gfx_firmware_header_v1_0 *cp_hdr;
878         const struct rlc_firmware_header_v2_0 *rlc_hdr;
879         unsigned int *tmp = NULL, i;
880
881         DRM_DEBUG("\n");
882
883         switch (adev->asic_type) {
884         case CHIP_TOPAZ:
885                 chip_name = "topaz";
886                 break;
887         case CHIP_TONGA:
888                 chip_name = "tonga";
889                 break;
890         case CHIP_CARRIZO:
891                 chip_name = "carrizo";
892                 break;
893         case CHIP_FIJI:
894                 chip_name = "fiji";
895                 break;
896         case CHIP_POLARIS11:
897                 chip_name = "polaris11";
898                 break;
899         case CHIP_POLARIS10:
900                 chip_name = "polaris10";
901                 break;
902         case CHIP_STONEY:
903                 chip_name = "stoney";
904                 break;
905         default:
906                 BUG();
907         }
908
909         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
910         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
911         if (err)
912                 goto out;
913         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
914         if (err)
915                 goto out;
916         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
917         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
918         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
919
920         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
921         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
922         if (err)
923                 goto out;
924         err = amdgpu_ucode_validate(adev->gfx.me_fw);
925         if (err)
926                 goto out;
927         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
928         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
929         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
930
931         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
932         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
933         if (err)
934                 goto out;
935         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
936         if (err)
937                 goto out;
938         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
939         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
940         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
941
942         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
943         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
944         if (err)
945                 goto out;
946         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
947         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
948         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
949         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
950
951         adev->gfx.rlc.save_and_restore_offset =
952                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
953         adev->gfx.rlc.clear_state_descriptor_offset =
954                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
955         adev->gfx.rlc.avail_scratch_ram_locations =
956                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
957         adev->gfx.rlc.reg_restore_list_size =
958                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
959         adev->gfx.rlc.reg_list_format_start =
960                         le32_to_cpu(rlc_hdr->reg_list_format_start);
961         adev->gfx.rlc.reg_list_format_separate_start =
962                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
963         adev->gfx.rlc.starting_offsets_start =
964                         le32_to_cpu(rlc_hdr->starting_offsets_start);
965         adev->gfx.rlc.reg_list_format_size_bytes =
966                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
967         adev->gfx.rlc.reg_list_size_bytes =
968                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
969
970         adev->gfx.rlc.register_list_format =
971                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
972                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
973
974         if (!adev->gfx.rlc.register_list_format) {
975                 err = -ENOMEM;
976                 goto out;
977         }
978
979         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
980                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
981         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
982                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
983
984         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
985
986         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
987                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
988         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
989                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
990
991         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
992         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
993         if (err)
994                 goto out;
995         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
996         if (err)
997                 goto out;
998         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
999         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1000         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1001
1002         if ((adev->asic_type != CHIP_STONEY) &&
1003             (adev->asic_type != CHIP_TOPAZ)) {
1004                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1005                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1006                 if (!err) {
1007                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1008                         if (err)
1009                                 goto out;
1010                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1011                                 adev->gfx.mec2_fw->data;
1012                         adev->gfx.mec2_fw_version =
1013                                 le32_to_cpu(cp_hdr->header.ucode_version);
1014                         adev->gfx.mec2_feature_version =
1015                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1016                 } else {
1017                         err = 0;
1018                         adev->gfx.mec2_fw = NULL;
1019                 }
1020         }
1021
1022         if (adev->firmware.smu_load) {
1023                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1024                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1025                 info->fw = adev->gfx.pfp_fw;
1026                 header = (const struct common_firmware_header *)info->fw->data;
1027                 adev->firmware.fw_size +=
1028                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1029
1030                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1031                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1032                 info->fw = adev->gfx.me_fw;
1033                 header = (const struct common_firmware_header *)info->fw->data;
1034                 adev->firmware.fw_size +=
1035                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1036
1037                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1038                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1039                 info->fw = adev->gfx.ce_fw;
1040                 header = (const struct common_firmware_header *)info->fw->data;
1041                 adev->firmware.fw_size +=
1042                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1043
1044                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1045                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1046                 info->fw = adev->gfx.rlc_fw;
1047                 header = (const struct common_firmware_header *)info->fw->data;
1048                 adev->firmware.fw_size +=
1049                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1052                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1053                 info->fw = adev->gfx.mec_fw;
1054                 header = (const struct common_firmware_header *)info->fw->data;
1055                 adev->firmware.fw_size +=
1056                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057
1058                 if (adev->gfx.mec2_fw) {
1059                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1060                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1061                         info->fw = adev->gfx.mec2_fw;
1062                         header = (const struct common_firmware_header *)info->fw->data;
1063                         adev->firmware.fw_size +=
1064                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065                 }
1066
1067         }
1068
1069 out:
1070         if (err) {
1071                 dev_err(adev->dev,
1072                         "gfx8: Failed to load firmware \"%s\"\n",
1073                         fw_name);
1074                 release_firmware(adev->gfx.pfp_fw);
1075                 adev->gfx.pfp_fw = NULL;
1076                 release_firmware(adev->gfx.me_fw);
1077                 adev->gfx.me_fw = NULL;
1078                 release_firmware(adev->gfx.ce_fw);
1079                 adev->gfx.ce_fw = NULL;
1080                 release_firmware(adev->gfx.rlc_fw);
1081                 adev->gfx.rlc_fw = NULL;
1082                 release_firmware(adev->gfx.mec_fw);
1083                 adev->gfx.mec_fw = NULL;
1084                 release_firmware(adev->gfx.mec2_fw);
1085                 adev->gfx.mec2_fw = NULL;
1086         }
1087         return err;
1088 }
1089
1090 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1091                                     volatile u32 *buffer)
1092 {
1093         u32 count = 0, i;
1094         const struct cs_section_def *sect = NULL;
1095         const struct cs_extent_def *ext = NULL;
1096
1097         if (adev->gfx.rlc.cs_data == NULL)
1098                 return;
1099         if (buffer == NULL)
1100                 return;
1101
1102         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1103         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1104
1105         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1106         buffer[count++] = cpu_to_le32(0x80000000);
1107         buffer[count++] = cpu_to_le32(0x80000000);
1108
1109         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1110                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1111                         if (sect->id == SECT_CONTEXT) {
1112                                 buffer[count++] =
1113                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1114                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1115                                                 PACKET3_SET_CONTEXT_REG_START);
1116                                 for (i = 0; i < ext->reg_count; i++)
1117                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1118                         } else {
1119                                 return;
1120                         }
1121                 }
1122         }
1123
1124         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1125         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1126                         PACKET3_SET_CONTEXT_REG_START);
1127         switch (adev->asic_type) {
1128         case CHIP_TONGA:
1129         case CHIP_POLARIS10:
1130                 buffer[count++] = cpu_to_le32(0x16000012);
1131                 buffer[count++] = cpu_to_le32(0x0000002A);
1132                 break;
1133         case CHIP_POLARIS11:
1134                 buffer[count++] = cpu_to_le32(0x16000012);
1135                 buffer[count++] = cpu_to_le32(0x00000000);
1136                 break;
1137         case CHIP_FIJI:
1138                 buffer[count++] = cpu_to_le32(0x3a00161a);
1139                 buffer[count++] = cpu_to_le32(0x0000002e);
1140                 break;
1141         case CHIP_TOPAZ:
1142         case CHIP_CARRIZO:
1143                 buffer[count++] = cpu_to_le32(0x00000002);
1144                 buffer[count++] = cpu_to_le32(0x00000000);
1145                 break;
1146         case CHIP_STONEY:
1147                 buffer[count++] = cpu_to_le32(0x00000000);
1148                 buffer[count++] = cpu_to_le32(0x00000000);
1149                 break;
1150         default:
1151                 buffer[count++] = cpu_to_le32(0x00000000);
1152                 buffer[count++] = cpu_to_le32(0x00000000);
1153                 break;
1154         }
1155
1156         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1157         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1158
1159         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1160         buffer[count++] = cpu_to_le32(0);
1161 }
1162
1163 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1164 {
1165         const __le32 *fw_data;
1166         volatile u32 *dst_ptr;
1167         int me, i, max_me = 4;
1168         u32 bo_offset = 0;
1169         u32 table_offset, table_size;
1170
1171         if (adev->asic_type == CHIP_CARRIZO)
1172                 max_me = 5;
1173
1174         /* write the cp table buffer */
1175         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1176         for (me = 0; me < max_me; me++) {
1177                 if (me == 0) {
1178                         const struct gfx_firmware_header_v1_0 *hdr =
1179                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1180                         fw_data = (const __le32 *)
1181                                 (adev->gfx.ce_fw->data +
1182                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1183                         table_offset = le32_to_cpu(hdr->jt_offset);
1184                         table_size = le32_to_cpu(hdr->jt_size);
1185                 } else if (me == 1) {
1186                         const struct gfx_firmware_header_v1_0 *hdr =
1187                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1188                         fw_data = (const __le32 *)
1189                                 (adev->gfx.pfp_fw->data +
1190                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1191                         table_offset = le32_to_cpu(hdr->jt_offset);
1192                         table_size = le32_to_cpu(hdr->jt_size);
1193                 } else if (me == 2) {
1194                         const struct gfx_firmware_header_v1_0 *hdr =
1195                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1196                         fw_data = (const __le32 *)
1197                                 (adev->gfx.me_fw->data +
1198                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1199                         table_offset = le32_to_cpu(hdr->jt_offset);
1200                         table_size = le32_to_cpu(hdr->jt_size);
1201                 } else if (me == 3) {
1202                         const struct gfx_firmware_header_v1_0 *hdr =
1203                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1204                         fw_data = (const __le32 *)
1205                                 (adev->gfx.mec_fw->data +
1206                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1207                         table_offset = le32_to_cpu(hdr->jt_offset);
1208                         table_size = le32_to_cpu(hdr->jt_size);
1209                 } else  if (me == 4) {
1210                         const struct gfx_firmware_header_v1_0 *hdr =
1211                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1212                         fw_data = (const __le32 *)
1213                                 (adev->gfx.mec2_fw->data +
1214                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1215                         table_offset = le32_to_cpu(hdr->jt_offset);
1216                         table_size = le32_to_cpu(hdr->jt_size);
1217                 }
1218
1219                 for (i = 0; i < table_size; i ++) {
1220                         dst_ptr[bo_offset + i] =
1221                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1222                 }
1223
1224                 bo_offset += table_size;
1225         }
1226 }
1227
1228 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1229 {
1230         int r;
1231
1232         /* clear state block */
1233         if (adev->gfx.rlc.clear_state_obj) {
1234                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1235                 if (unlikely(r != 0))
1236                         dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1237                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1238                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1239
1240                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1241                 adev->gfx.rlc.clear_state_obj = NULL;
1242         }
1243
1244         /* jump table block */
1245         if (adev->gfx.rlc.cp_table_obj) {
1246                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1247                 if (unlikely(r != 0))
1248                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1249                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1250                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1251
1252                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1253                 adev->gfx.rlc.cp_table_obj = NULL;
1254         }
1255 }
1256
1257 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1258 {
1259         volatile u32 *dst_ptr;
1260         u32 dws;
1261         const struct cs_section_def *cs_data;
1262         int r;
1263
1264         adev->gfx.rlc.cs_data = vi_cs_data;
1265
1266         cs_data = adev->gfx.rlc.cs_data;
1267
1268         if (cs_data) {
1269                 /* clear state block */
1270                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1271
1272                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1273                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1274                                              AMDGPU_GEM_DOMAIN_VRAM,
1275                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1276                                              NULL, NULL,
1277                                              &adev->gfx.rlc.clear_state_obj);
1278                         if (r) {
1279                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1280                                 gfx_v8_0_rlc_fini(adev);
1281                                 return r;
1282                         }
1283                 }
1284                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1285                 if (unlikely(r != 0)) {
1286                         gfx_v8_0_rlc_fini(adev);
1287                         return r;
1288                 }
1289                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1290                                   &adev->gfx.rlc.clear_state_gpu_addr);
1291                 if (r) {
1292                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1293                         dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1294                         gfx_v8_0_rlc_fini(adev);
1295                         return r;
1296                 }
1297
1298                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1299                 if (r) {
1300                         dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1301                         gfx_v8_0_rlc_fini(adev);
1302                         return r;
1303                 }
1304                 /* set up the cs buffer */
1305                 dst_ptr = adev->gfx.rlc.cs_ptr;
1306                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1307                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1308                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1309         }
1310
1311         if ((adev->asic_type == CHIP_CARRIZO) ||
1312             (adev->asic_type == CHIP_STONEY)) {
1313                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1314                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1315                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1316                                              AMDGPU_GEM_DOMAIN_VRAM,
1317                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1318                                              NULL, NULL,
1319                                              &adev->gfx.rlc.cp_table_obj);
1320                         if (r) {
1321                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1322                                 return r;
1323                         }
1324                 }
1325
1326                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1327                 if (unlikely(r != 0)) {
1328                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1329                         return r;
1330                 }
1331                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1332                                   &adev->gfx.rlc.cp_table_gpu_addr);
1333                 if (r) {
1334                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1335                         dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
1336                         return r;
1337                 }
1338                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1339                 if (r) {
1340                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1341                         return r;
1342                 }
1343
1344                 cz_init_cp_jump_table(adev);
1345
1346                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1347                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1348
1349         }
1350
1351         return 0;
1352 }
1353
1354 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1355 {
1356         int r;
1357
1358         if (adev->gfx.mec.hpd_eop_obj) {
1359                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1360                 if (unlikely(r != 0))
1361                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1362                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1363                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1364
1365                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1366                 adev->gfx.mec.hpd_eop_obj = NULL;
1367         }
1368 }
1369
1370 #define MEC_HPD_SIZE 2048
1371
1372 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1373 {
1374         int r;
1375         u32 *hpd;
1376
1377         /*
1378          * we assign only 1 pipe because all other pipes will
1379          * be handled by KFD
1380          */
1381         adev->gfx.mec.num_mec = 1;
1382         adev->gfx.mec.num_pipe = 1;
1383         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1384
1385         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1386                 r = amdgpu_bo_create(adev,
1387                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1388                                      PAGE_SIZE, true,
1389                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1390                                      &adev->gfx.mec.hpd_eop_obj);
1391                 if (r) {
1392                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1393                         return r;
1394                 }
1395         }
1396
1397         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1398         if (unlikely(r != 0)) {
1399                 gfx_v8_0_mec_fini(adev);
1400                 return r;
1401         }
1402         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1403                           &adev->gfx.mec.hpd_eop_gpu_addr);
1404         if (r) {
1405                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1406                 gfx_v8_0_mec_fini(adev);
1407                 return r;
1408         }
1409         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1410         if (r) {
1411                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1412                 gfx_v8_0_mec_fini(adev);
1413                 return r;
1414         }
1415
1416         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1417
1418         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1419         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1420
1421         return 0;
1422 }
1423
1424 static const u32 vgpr_init_compute_shader[] =
1425 {
1426         0x7e000209, 0x7e020208,
1427         0x7e040207, 0x7e060206,
1428         0x7e080205, 0x7e0a0204,
1429         0x7e0c0203, 0x7e0e0202,
1430         0x7e100201, 0x7e120200,
1431         0x7e140209, 0x7e160208,
1432         0x7e180207, 0x7e1a0206,
1433         0x7e1c0205, 0x7e1e0204,
1434         0x7e200203, 0x7e220202,
1435         0x7e240201, 0x7e260200,
1436         0x7e280209, 0x7e2a0208,
1437         0x7e2c0207, 0x7e2e0206,
1438         0x7e300205, 0x7e320204,
1439         0x7e340203, 0x7e360202,
1440         0x7e380201, 0x7e3a0200,
1441         0x7e3c0209, 0x7e3e0208,
1442         0x7e400207, 0x7e420206,
1443         0x7e440205, 0x7e460204,
1444         0x7e480203, 0x7e4a0202,
1445         0x7e4c0201, 0x7e4e0200,
1446         0x7e500209, 0x7e520208,
1447         0x7e540207, 0x7e560206,
1448         0x7e580205, 0x7e5a0204,
1449         0x7e5c0203, 0x7e5e0202,
1450         0x7e600201, 0x7e620200,
1451         0x7e640209, 0x7e660208,
1452         0x7e680207, 0x7e6a0206,
1453         0x7e6c0205, 0x7e6e0204,
1454         0x7e700203, 0x7e720202,
1455         0x7e740201, 0x7e760200,
1456         0x7e780209, 0x7e7a0208,
1457         0x7e7c0207, 0x7e7e0206,
1458         0xbf8a0000, 0xbf810000,
1459 };
1460
1461 static const u32 sgpr_init_compute_shader[] =
1462 {
1463         0xbe8a0100, 0xbe8c0102,
1464         0xbe8e0104, 0xbe900106,
1465         0xbe920108, 0xbe940100,
1466         0xbe960102, 0xbe980104,
1467         0xbe9a0106, 0xbe9c0108,
1468         0xbe9e0100, 0xbea00102,
1469         0xbea20104, 0xbea40106,
1470         0xbea60108, 0xbea80100,
1471         0xbeaa0102, 0xbeac0104,
1472         0xbeae0106, 0xbeb00108,
1473         0xbeb20100, 0xbeb40102,
1474         0xbeb60104, 0xbeb80106,
1475         0xbeba0108, 0xbebc0100,
1476         0xbebe0102, 0xbec00104,
1477         0xbec20106, 0xbec40108,
1478         0xbec60100, 0xbec80102,
1479         0xbee60004, 0xbee70005,
1480         0xbeea0006, 0xbeeb0007,
1481         0xbee80008, 0xbee90009,
1482         0xbefc0000, 0xbf8a0000,
1483         0xbf810000, 0x00000000,
1484 };
1485
1486 static const u32 vgpr_init_regs[] =
1487 {
1488         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1489         mmCOMPUTE_RESOURCE_LIMITS, 0,
1490         mmCOMPUTE_NUM_THREAD_X, 256*4,
1491         mmCOMPUTE_NUM_THREAD_Y, 1,
1492         mmCOMPUTE_NUM_THREAD_Z, 1,
1493         mmCOMPUTE_PGM_RSRC2, 20,
1494         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1495         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1496         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1497         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1498         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1499         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1500         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1501         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1502         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1503         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1504 };
1505
1506 static const u32 sgpr1_init_regs[] =
1507 {
1508         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1509         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1510         mmCOMPUTE_NUM_THREAD_X, 256*5,
1511         mmCOMPUTE_NUM_THREAD_Y, 1,
1512         mmCOMPUTE_NUM_THREAD_Z, 1,
1513         mmCOMPUTE_PGM_RSRC2, 20,
1514         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1515         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1516         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1517         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1518         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1519         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1520         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1521         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1522         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1523         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1524 };
1525
1526 static const u32 sgpr2_init_regs[] =
1527 {
1528         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1529         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1530         mmCOMPUTE_NUM_THREAD_X, 256*5,
1531         mmCOMPUTE_NUM_THREAD_Y, 1,
1532         mmCOMPUTE_NUM_THREAD_Z, 1,
1533         mmCOMPUTE_PGM_RSRC2, 20,
1534         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1535         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1536         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1537         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1538         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1539         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1540         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1541         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1542         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1543         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1544 };
1545
1546 static const u32 sec_ded_counter_registers[] =
1547 {
1548         mmCPC_EDC_ATC_CNT,
1549         mmCPC_EDC_SCRATCH_CNT,
1550         mmCPC_EDC_UCODE_CNT,
1551         mmCPF_EDC_ATC_CNT,
1552         mmCPF_EDC_ROQ_CNT,
1553         mmCPF_EDC_TAG_CNT,
1554         mmCPG_EDC_ATC_CNT,
1555         mmCPG_EDC_DMA_CNT,
1556         mmCPG_EDC_TAG_CNT,
1557         mmDC_EDC_CSINVOC_CNT,
1558         mmDC_EDC_RESTORE_CNT,
1559         mmDC_EDC_STATE_CNT,
1560         mmGDS_EDC_CNT,
1561         mmGDS_EDC_GRBM_CNT,
1562         mmGDS_EDC_OA_DED,
1563         mmSPI_EDC_CNT,
1564         mmSQC_ATC_EDC_GATCL1_CNT,
1565         mmSQC_EDC_CNT,
1566         mmSQ_EDC_DED_CNT,
1567         mmSQ_EDC_INFO,
1568         mmSQ_EDC_SEC_CNT,
1569         mmTCC_EDC_CNT,
1570         mmTCP_ATC_EDC_GATCL1_CNT,
1571         mmTCP_EDC_CNT,
1572         mmTD_EDC_CNT
1573 };
1574
1575 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1576 {
1577         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1578         struct amdgpu_ib ib;
1579         struct fence *f = NULL;
1580         int r, i;
1581         u32 tmp;
1582         unsigned total_size, vgpr_offset, sgpr_offset;
1583         u64 gpu_addr;
1584
1585         /* only supported on CZ */
1586         if (adev->asic_type != CHIP_CARRIZO)
1587                 return 0;
1588
1589         /* bail if the compute ring is not ready */
1590         if (!ring->ready)
1591                 return 0;
1592
1593         tmp = RREG32(mmGB_EDC_MODE);
1594         WREG32(mmGB_EDC_MODE, 0);
1595
1596         total_size =
1597                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1598         total_size +=
1599                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1600         total_size +=
1601                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1602         total_size = ALIGN(total_size, 256);
1603         vgpr_offset = total_size;
1604         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1605         sgpr_offset = total_size;
1606         total_size += sizeof(sgpr_init_compute_shader);
1607
1608         /* allocate an indirect buffer to put the commands in */
1609         memset(&ib, 0, sizeof(ib));
1610         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1611         if (r) {
1612                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1613                 return r;
1614         }
1615
1616         /* load the compute shaders */
1617         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1618                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1619
1620         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1621                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1622
1623         /* init the ib length to 0 */
1624         ib.length_dw = 0;
1625
1626         /* VGPR */
1627         /* write the register state for the compute dispatch */
1628         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1629                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1630                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1631                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1632         }
1633         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1634         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1635         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1636         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1637         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1638         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1639
1640         /* write dispatch packet */
1641         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1642         ib.ptr[ib.length_dw++] = 8; /* x */
1643         ib.ptr[ib.length_dw++] = 1; /* y */
1644         ib.ptr[ib.length_dw++] = 1; /* z */
1645         ib.ptr[ib.length_dw++] =
1646                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1647
1648         /* write CS partial flush packet */
1649         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1650         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1651
1652         /* SGPR1 */
1653         /* write the register state for the compute dispatch */
1654         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1655                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1656                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1657                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1658         }
1659         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1660         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1661         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1662         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1663         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1664         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1665
1666         /* write dispatch packet */
1667         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1668         ib.ptr[ib.length_dw++] = 8; /* x */
1669         ib.ptr[ib.length_dw++] = 1; /* y */
1670         ib.ptr[ib.length_dw++] = 1; /* z */
1671         ib.ptr[ib.length_dw++] =
1672                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1673
1674         /* write CS partial flush packet */
1675         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1676         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1677
1678         /* SGPR2 */
1679         /* write the register state for the compute dispatch */
1680         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1681                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1682                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1683                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1684         }
1685         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1686         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1687         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1688         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1689         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1690         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1691
1692         /* write dispatch packet */
1693         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1694         ib.ptr[ib.length_dw++] = 8; /* x */
1695         ib.ptr[ib.length_dw++] = 1; /* y */
1696         ib.ptr[ib.length_dw++] = 1; /* z */
1697         ib.ptr[ib.length_dw++] =
1698                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1699
1700         /* write CS partial flush packet */
1701         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1702         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1703
1704         /* shedule the ib on the ring */
1705         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1706         if (r) {
1707                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1708                 goto fail;
1709         }
1710
1711         /* wait for the GPU to finish processing the IB */
1712         r = fence_wait(f, false);
1713         if (r) {
1714                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1715                 goto fail;
1716         }
1717
1718         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1719         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1720         WREG32(mmGB_EDC_MODE, tmp);
1721
1722         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1723         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1724         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1725
1726
1727         /* read back registers to clear the counters */
1728         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1729                 RREG32(sec_ded_counter_registers[i]);
1730
1731 fail:
1732         amdgpu_ib_free(adev, &ib, NULL);
1733         fence_put(f);
1734
1735         return r;
1736 }
1737
1738 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1739 {
1740         u32 gb_addr_config;
1741         u32 mc_shared_chmap, mc_arb_ramcfg;
1742         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1743         u32 tmp;
1744         int ret;
1745
1746         switch (adev->asic_type) {
1747         case CHIP_TOPAZ:
1748                 adev->gfx.config.max_shader_engines = 1;
1749                 adev->gfx.config.max_tile_pipes = 2;
1750                 adev->gfx.config.max_cu_per_sh = 6;
1751                 adev->gfx.config.max_sh_per_se = 1;
1752                 adev->gfx.config.max_backends_per_se = 2;
1753                 adev->gfx.config.max_texture_channel_caches = 2;
1754                 adev->gfx.config.max_gprs = 256;
1755                 adev->gfx.config.max_gs_threads = 32;
1756                 adev->gfx.config.max_hw_contexts = 8;
1757
1758                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1759                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1760                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1761                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1762                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1763                 break;
1764         case CHIP_FIJI:
1765                 adev->gfx.config.max_shader_engines = 4;
1766                 adev->gfx.config.max_tile_pipes = 16;
1767                 adev->gfx.config.max_cu_per_sh = 16;
1768                 adev->gfx.config.max_sh_per_se = 1;
1769                 adev->gfx.config.max_backends_per_se = 4;
1770                 adev->gfx.config.max_texture_channel_caches = 16;
1771                 adev->gfx.config.max_gprs = 256;
1772                 adev->gfx.config.max_gs_threads = 32;
1773                 adev->gfx.config.max_hw_contexts = 8;
1774
1775                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1776                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1777                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1778                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1779                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1780                 break;
1781         case CHIP_POLARIS11:
1782                 ret = amdgpu_atombios_get_gfx_info(adev);
1783                 if (ret)
1784                         return ret;
1785                 adev->gfx.config.max_gprs = 256;
1786                 adev->gfx.config.max_gs_threads = 32;
1787                 adev->gfx.config.max_hw_contexts = 8;
1788
1789                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1790                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1791                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1792                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1793                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1794                 break;
1795         case CHIP_POLARIS10:
1796                 ret = amdgpu_atombios_get_gfx_info(adev);
1797                 if (ret)
1798                         return ret;
1799                 adev->gfx.config.max_gprs = 256;
1800                 adev->gfx.config.max_gs_threads = 32;
1801                 adev->gfx.config.max_hw_contexts = 8;
1802
1803                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1804                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1805                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1806                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1807                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1808                 break;
1809         case CHIP_TONGA:
1810                 adev->gfx.config.max_shader_engines = 4;
1811                 adev->gfx.config.max_tile_pipes = 8;
1812                 adev->gfx.config.max_cu_per_sh = 8;
1813                 adev->gfx.config.max_sh_per_se = 1;
1814                 adev->gfx.config.max_backends_per_se = 2;
1815                 adev->gfx.config.max_texture_channel_caches = 8;
1816                 adev->gfx.config.max_gprs = 256;
1817                 adev->gfx.config.max_gs_threads = 32;
1818                 adev->gfx.config.max_hw_contexts = 8;
1819
1820                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1821                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1822                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1823                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1824                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1825                 break;
1826         case CHIP_CARRIZO:
1827                 adev->gfx.config.max_shader_engines = 1;
1828                 adev->gfx.config.max_tile_pipes = 2;
1829                 adev->gfx.config.max_sh_per_se = 1;
1830                 adev->gfx.config.max_backends_per_se = 2;
1831
1832                 switch (adev->pdev->revision) {
1833                 case 0xc4:
1834                 case 0x84:
1835                 case 0xc8:
1836                 case 0xcc:
1837                 case 0xe1:
1838                 case 0xe3:
1839                         /* B10 */
1840                         adev->gfx.config.max_cu_per_sh = 8;
1841                         break;
1842                 case 0xc5:
1843                 case 0x81:
1844                 case 0x85:
1845                 case 0xc9:
1846                 case 0xcd:
1847                 case 0xe2:
1848                 case 0xe4:
1849                         /* B8 */
1850                         adev->gfx.config.max_cu_per_sh = 6;
1851                         break;
1852                 case 0xc6:
1853                 case 0xca:
1854                 case 0xce:
1855                 case 0x88:
1856                         /* B6 */
1857                         adev->gfx.config.max_cu_per_sh = 6;
1858                         break;
1859                 case 0xc7:
1860                 case 0x87:
1861                 case 0xcb:
1862                 case 0xe5:
1863                 case 0x89:
1864                 default:
1865                         /* B4 */
1866                         adev->gfx.config.max_cu_per_sh = 4;
1867                         break;
1868                 }
1869
1870                 adev->gfx.config.max_texture_channel_caches = 2;
1871                 adev->gfx.config.max_gprs = 256;
1872                 adev->gfx.config.max_gs_threads = 32;
1873                 adev->gfx.config.max_hw_contexts = 8;
1874
1875                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1876                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1877                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1878                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1879                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1880                 break;
1881         case CHIP_STONEY:
1882                 adev->gfx.config.max_shader_engines = 1;
1883                 adev->gfx.config.max_tile_pipes = 2;
1884                 adev->gfx.config.max_sh_per_se = 1;
1885                 adev->gfx.config.max_backends_per_se = 1;
1886
1887                 switch (adev->pdev->revision) {
1888                 case 0xc0:
1889                 case 0xc1:
1890                 case 0xc2:
1891                 case 0xc4:
1892                 case 0xc8:
1893                 case 0xc9:
1894                         adev->gfx.config.max_cu_per_sh = 3;
1895                         break;
1896                 case 0xd0:
1897                 case 0xd1:
1898                 case 0xd2:
1899                 default:
1900                         adev->gfx.config.max_cu_per_sh = 2;
1901                         break;
1902                 }
1903
1904                 adev->gfx.config.max_texture_channel_caches = 2;
1905                 adev->gfx.config.max_gprs = 256;
1906                 adev->gfx.config.max_gs_threads = 16;
1907                 adev->gfx.config.max_hw_contexts = 8;
1908
1909                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1910                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1911                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1912                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1913                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1914                 break;
1915         default:
1916                 adev->gfx.config.max_shader_engines = 2;
1917                 adev->gfx.config.max_tile_pipes = 4;
1918                 adev->gfx.config.max_cu_per_sh = 2;
1919                 adev->gfx.config.max_sh_per_se = 1;
1920                 adev->gfx.config.max_backends_per_se = 2;
1921                 adev->gfx.config.max_texture_channel_caches = 4;
1922                 adev->gfx.config.max_gprs = 256;
1923                 adev->gfx.config.max_gs_threads = 32;
1924                 adev->gfx.config.max_hw_contexts = 8;
1925
1926                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1927                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1928                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1929                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1930                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1931                 break;
1932         }
1933
1934         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1935         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1936         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1937
1938         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1939         adev->gfx.config.mem_max_burst_length_bytes = 256;
1940         if (adev->flags & AMD_IS_APU) {
1941                 /* Get memory bank mapping mode. */
1942                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1943                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1944                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1945
1946                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1947                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1948                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1949
1950                 /* Validate settings in case only one DIMM installed. */
1951                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1952                         dimm00_addr_map = 0;
1953                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1954                         dimm01_addr_map = 0;
1955                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1956                         dimm10_addr_map = 0;
1957                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1958                         dimm11_addr_map = 0;
1959
1960                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1961                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1962                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1963                         adev->gfx.config.mem_row_size_in_kb = 2;
1964                 else
1965                         adev->gfx.config.mem_row_size_in_kb = 1;
1966         } else {
1967                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1968                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1969                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1970                         adev->gfx.config.mem_row_size_in_kb = 4;
1971         }
1972
1973         adev->gfx.config.shader_engine_tile_size = 32;
1974         adev->gfx.config.num_gpus = 1;
1975         adev->gfx.config.multi_gpu_tile_size = 64;
1976
1977         /* fix up row size */
1978         switch (adev->gfx.config.mem_row_size_in_kb) {
1979         case 1:
1980         default:
1981                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1982                 break;
1983         case 2:
1984                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1985                 break;
1986         case 4:
1987                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1988                 break;
1989         }
1990         adev->gfx.config.gb_addr_config = gb_addr_config;
1991
1992         return 0;
1993 }
1994
1995 static int gfx_v8_0_sw_init(void *handle)
1996 {
1997         int i, r;
1998         struct amdgpu_ring *ring;
1999         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2000
2001         /* EOP Event */
2002         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2003         if (r)
2004                 return r;
2005
2006         /* Privileged reg */
2007         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2008         if (r)
2009                 return r;
2010
2011         /* Privileged inst */
2012         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2013         if (r)
2014                 return r;
2015
2016         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2017
2018         gfx_v8_0_scratch_init(adev);
2019
2020         r = gfx_v8_0_init_microcode(adev);
2021         if (r) {
2022                 DRM_ERROR("Failed to load gfx firmware!\n");
2023                 return r;
2024         }
2025
2026         r = gfx_v8_0_rlc_init(adev);
2027         if (r) {
2028                 DRM_ERROR("Failed to init rlc BOs!\n");
2029                 return r;
2030         }
2031
2032         r = gfx_v8_0_mec_init(adev);
2033         if (r) {
2034                 DRM_ERROR("Failed to init MEC BOs!\n");
2035                 return r;
2036         }
2037
2038         /* set up the gfx ring */
2039         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2040                 ring = &adev->gfx.gfx_ring[i];
2041                 ring->ring_obj = NULL;
2042                 sprintf(ring->name, "gfx");
2043                 /* no gfx doorbells on iceland */
2044                 if (adev->asic_type != CHIP_TOPAZ) {
2045                         ring->use_doorbell = true;
2046                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2047                 }
2048
2049                 r = amdgpu_ring_init(adev, ring, 1024,
2050                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2051                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2052                                      AMDGPU_RING_TYPE_GFX);
2053                 if (r)
2054                         return r;
2055         }
2056
2057         /* set up the compute queues */
2058         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2059                 unsigned irq_type;
2060
2061                 /* max 32 queues per MEC */
2062                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2063                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2064                         break;
2065                 }
2066                 ring = &adev->gfx.compute_ring[i];
2067                 ring->ring_obj = NULL;
2068                 ring->use_doorbell = true;
2069                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2070                 ring->me = 1; /* first MEC */
2071                 ring->pipe = i / 8;
2072                 ring->queue = i % 8;
2073                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2074                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2075                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2076                 r = amdgpu_ring_init(adev, ring, 1024,
2077                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2078                                      &adev->gfx.eop_irq, irq_type,
2079                                      AMDGPU_RING_TYPE_COMPUTE);
2080                 if (r)
2081                         return r;
2082         }
2083
2084         /* reserve GDS, GWS and OA resource for gfx */
2085         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
2086                         PAGE_SIZE, true,
2087                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
2088                         NULL, &adev->gds.gds_gfx_bo);
2089         if (r)
2090                 return r;
2091
2092         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
2093                 PAGE_SIZE, true,
2094                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
2095                 NULL, &adev->gds.gws_gfx_bo);
2096         if (r)
2097                 return r;
2098
2099         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
2100                         PAGE_SIZE, true,
2101                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
2102                         NULL, &adev->gds.oa_gfx_bo);
2103         if (r)
2104                 return r;
2105
2106         adev->gfx.ce_ram_size = 0x8000;
2107
2108         r = gfx_v8_0_gpu_early_init(adev);
2109         if (r)
2110                 return r;
2111
2112         return 0;
2113 }
2114
2115 static int gfx_v8_0_sw_fini(void *handle)
2116 {
2117         int i;
2118         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2119
2120         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2121         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2122         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2123
2124         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2125                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2126         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2127                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2128
2129         gfx_v8_0_mec_fini(adev);
2130
2131         gfx_v8_0_rlc_fini(adev);
2132
2133         gfx_v8_0_free_microcode(adev);
2134
2135         return 0;
2136 }
2137
2138 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2139 {
2140         uint32_t *modearray, *mod2array;
2141         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2142         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2143         u32 reg_offset;
2144
2145         modearray = adev->gfx.config.tile_mode_array;
2146         mod2array = adev->gfx.config.macrotile_mode_array;
2147
2148         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2149                 modearray[reg_offset] = 0;
2150
2151         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2152                 mod2array[reg_offset] = 0;
2153
2154         switch (adev->asic_type) {
2155         case CHIP_TOPAZ:
2156                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157                                 PIPE_CONFIG(ADDR_SURF_P2) |
2158                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2159                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161                                 PIPE_CONFIG(ADDR_SURF_P2) |
2162                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2163                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165                                 PIPE_CONFIG(ADDR_SURF_P2) |
2166                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2167                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169                                 PIPE_CONFIG(ADDR_SURF_P2) |
2170                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2171                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2173                                 PIPE_CONFIG(ADDR_SURF_P2) |
2174                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2175                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2176                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2177                                 PIPE_CONFIG(ADDR_SURF_P2) |
2178                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2179                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2180                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2181                                 PIPE_CONFIG(ADDR_SURF_P2) |
2182                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2183                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2184                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2185                                 PIPE_CONFIG(ADDR_SURF_P2));
2186                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2187                                 PIPE_CONFIG(ADDR_SURF_P2) |
2188                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2189                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2191                                  PIPE_CONFIG(ADDR_SURF_P2) |
2192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2194                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2195                                  PIPE_CONFIG(ADDR_SURF_P2) |
2196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2198                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2199                                  PIPE_CONFIG(ADDR_SURF_P2) |
2200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2203                                  PIPE_CONFIG(ADDR_SURF_P2) |
2204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2207                                  PIPE_CONFIG(ADDR_SURF_P2) |
2208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2210                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2211                                  PIPE_CONFIG(ADDR_SURF_P2) |
2212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2214                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2215                                  PIPE_CONFIG(ADDR_SURF_P2) |
2216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2219                                  PIPE_CONFIG(ADDR_SURF_P2) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2223                                  PIPE_CONFIG(ADDR_SURF_P2) |
2224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2227                                  PIPE_CONFIG(ADDR_SURF_P2) |
2228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2230                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2231                                  PIPE_CONFIG(ADDR_SURF_P2) |
2232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2234                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2235                                  PIPE_CONFIG(ADDR_SURF_P2) |
2236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2238                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2239                                  PIPE_CONFIG(ADDR_SURF_P2) |
2240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2242                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2243                                  PIPE_CONFIG(ADDR_SURF_P2) |
2244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2246                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2247                                  PIPE_CONFIG(ADDR_SURF_P2) |
2248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2250                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2251                                  PIPE_CONFIG(ADDR_SURF_P2) |
2252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2254                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2255                                  PIPE_CONFIG(ADDR_SURF_P2) |
2256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2258
2259                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2260                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262                                 NUM_BANKS(ADDR_SURF_8_BANK));
2263                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2264                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2266                                 NUM_BANKS(ADDR_SURF_8_BANK));
2267                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2268                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2269                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2270                                 NUM_BANKS(ADDR_SURF_8_BANK));
2271                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2273                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2274                                 NUM_BANKS(ADDR_SURF_8_BANK));
2275                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2277                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2278                                 NUM_BANKS(ADDR_SURF_8_BANK));
2279                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2282                                 NUM_BANKS(ADDR_SURF_8_BANK));
2283                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2284                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2285                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2286                                 NUM_BANKS(ADDR_SURF_8_BANK));
2287                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2288                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2289                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290                                 NUM_BANKS(ADDR_SURF_16_BANK));
2291                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2292                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294                                 NUM_BANKS(ADDR_SURF_16_BANK));
2295                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2296                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2297                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298                                  NUM_BANKS(ADDR_SURF_16_BANK));
2299                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2300                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2301                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2302                                  NUM_BANKS(ADDR_SURF_16_BANK));
2303                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2305                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2306                                  NUM_BANKS(ADDR_SURF_16_BANK));
2307                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2309                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2310                                  NUM_BANKS(ADDR_SURF_16_BANK));
2311                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2312                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2313                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2314                                  NUM_BANKS(ADDR_SURF_8_BANK));
2315
2316                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2317                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2318                             reg_offset != 23)
2319                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2320
2321                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2322                         if (reg_offset != 7)
2323                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2324
2325                 break;
2326         case CHIP_FIJI:
2327                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2344                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2348                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2350                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2351                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2352                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2354                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2355                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2356                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2357                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2358                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2359                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2360                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2361                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2362                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2369                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2370                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2373                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2374                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2377                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2378                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2382                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2386                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2389                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2390                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2393                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2394                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2397                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2398                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2400                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2402                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2406                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2410                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2414                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2416                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2418                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2422                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2425                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2426                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2429                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2430                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2432                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2433                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2438                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2441                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2442                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2444                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2445                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2446                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2448                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2449
2450                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453                                 NUM_BANKS(ADDR_SURF_8_BANK));
2454                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2457                                 NUM_BANKS(ADDR_SURF_8_BANK));
2458                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461                                 NUM_BANKS(ADDR_SURF_8_BANK));
2462                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2464                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2465                                 NUM_BANKS(ADDR_SURF_8_BANK));
2466                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2468                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469                                 NUM_BANKS(ADDR_SURF_8_BANK));
2470                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473                                 NUM_BANKS(ADDR_SURF_8_BANK));
2474                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477                                 NUM_BANKS(ADDR_SURF_8_BANK));
2478                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2480                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481                                 NUM_BANKS(ADDR_SURF_8_BANK));
2482                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2484                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2485                                 NUM_BANKS(ADDR_SURF_8_BANK));
2486                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2488                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2489                                  NUM_BANKS(ADDR_SURF_8_BANK));
2490                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2492                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2493                                  NUM_BANKS(ADDR_SURF_8_BANK));
2494                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2496                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2497                                  NUM_BANKS(ADDR_SURF_8_BANK));
2498                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2501                                  NUM_BANKS(ADDR_SURF_8_BANK));
2502                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2505                                  NUM_BANKS(ADDR_SURF_4_BANK));
2506
2507                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2508                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2509
2510                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2511                         if (reg_offset != 7)
2512                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2513
2514                 break;
2515         case CHIP_TONGA:
2516                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2519                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2523                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2527                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2531                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2537                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2539                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2540                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2542                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2543                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2544                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2545                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2546                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2547                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2548                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2549                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2550                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2551                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2554                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2555                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2558                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2559                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2561                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2562                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2563                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2564                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2565                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2566                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2567                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2575                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2579                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2582                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2583                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2584                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2586                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2587                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2589                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2591                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2595                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2599                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2603                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2605                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2607                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2608                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2609                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2611                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2613                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2614                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2615                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2617                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2618                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2619                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2621                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2622                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2623                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2627                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2628                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2630                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2631                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2633                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2634                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2635                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2636                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2637                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2638
2639                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2654                                 NUM_BANKS(ADDR_SURF_16_BANK));
2655                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2657                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2658                                 NUM_BANKS(ADDR_SURF_16_BANK));
2659                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662                                 NUM_BANKS(ADDR_SURF_16_BANK));
2663                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2665                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2666                                 NUM_BANKS(ADDR_SURF_16_BANK));
2667                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2669                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2670                                 NUM_BANKS(ADDR_SURF_16_BANK));
2671                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2673                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2674                                 NUM_BANKS(ADDR_SURF_16_BANK));
2675                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2677                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2678                                  NUM_BANKS(ADDR_SURF_16_BANK));
2679                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2682                                  NUM_BANKS(ADDR_SURF_16_BANK));
2683                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2686                                  NUM_BANKS(ADDR_SURF_8_BANK));
2687                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2688                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2689                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2690                                  NUM_BANKS(ADDR_SURF_4_BANK));
2691                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2693                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2694                                  NUM_BANKS(ADDR_SURF_4_BANK));
2695
2696                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2697                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2698
2699                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2700                         if (reg_offset != 7)
2701                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2702
2703                 break;
2704         case CHIP_POLARIS11:
2705                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2708                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2712                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2716                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2720                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2721                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2722                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2724                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2725                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2726                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2728                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2729                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2730                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2732                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2733                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2734                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2735                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2736                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2737                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2738                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2739                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2747                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2755                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2763                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2767                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2771                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2775                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2784                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2788                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2792                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2794                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2795                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2796                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2798                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2799                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2800                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2802                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2803                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2804                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2806                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2807                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2808                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2810                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2811                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2814                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2818                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2819                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2822                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2824                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2825                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2826                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2827
2828                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2830                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2831                                 NUM_BANKS(ADDR_SURF_16_BANK));
2832
2833                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2835                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2836                                 NUM_BANKS(ADDR_SURF_16_BANK));
2837
2838                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2840                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2841                                 NUM_BANKS(ADDR_SURF_16_BANK));
2842
2843                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2844                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2845                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2846                                 NUM_BANKS(ADDR_SURF_16_BANK));
2847
2848                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851                                 NUM_BANKS(ADDR_SURF_16_BANK));
2852
2853                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2856                                 NUM_BANKS(ADDR_SURF_16_BANK));
2857
2858                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2859                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2860                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2861                                 NUM_BANKS(ADDR_SURF_16_BANK));
2862
2863                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2864                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2865                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2866                                 NUM_BANKS(ADDR_SURF_16_BANK));
2867
2868                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2869                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2870                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871                                 NUM_BANKS(ADDR_SURF_16_BANK));
2872
2873                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2874                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2875                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2876                                 NUM_BANKS(ADDR_SURF_16_BANK));
2877
2878                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2879                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2880                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2881                                 NUM_BANKS(ADDR_SURF_16_BANK));
2882
2883                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2884                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2885                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2886                                 NUM_BANKS(ADDR_SURF_16_BANK));
2887
2888                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2889                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2890                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2891                                 NUM_BANKS(ADDR_SURF_8_BANK));
2892
2893                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2894                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2895                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2896                                 NUM_BANKS(ADDR_SURF_4_BANK));
2897
2898                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2899                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2900
2901                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2902                         if (reg_offset != 7)
2903                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2904
2905                 break;
2906         case CHIP_POLARIS10:
2907                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2910                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2914                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2918                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2922                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2923                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2924                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2926                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2928                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2930                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2931                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2932                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2933                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2934                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2935                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2936                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2937                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2938                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2939                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2940                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2941                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2949                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2954                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2957                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2962                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2969                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2973                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2974                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2977                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2978                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2982                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2986                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2990                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2994                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2996                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2997                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2998                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3000                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3001                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3002                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3004                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3005                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3006                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3008                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3009                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3010                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3012                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3013                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3014                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3016                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3018                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3020                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3021                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3023                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3024                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3026                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3027                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3028                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3029
3030                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3031                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3032                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3033                                 NUM_BANKS(ADDR_SURF_16_BANK));
3034
3035                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3036                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3037                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3038                                 NUM_BANKS(ADDR_SURF_16_BANK));
3039
3040                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3042                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3043                                 NUM_BANKS(ADDR_SURF_16_BANK));
3044
3045                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3046                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3047                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3048                                 NUM_BANKS(ADDR_SURF_16_BANK));
3049
3050                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3051                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3052                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3053                                 NUM_BANKS(ADDR_SURF_16_BANK));
3054
3055                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3056                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3057                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3058                                 NUM_BANKS(ADDR_SURF_16_BANK));
3059
3060                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3061                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3062                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3063                                 NUM_BANKS(ADDR_SURF_16_BANK));
3064
3065                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3066                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3067                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3068                                 NUM_BANKS(ADDR_SURF_16_BANK));
3069
3070                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3071                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3072                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3073                                 NUM_BANKS(ADDR_SURF_16_BANK));
3074
3075                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3076                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3077                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3078                                 NUM_BANKS(ADDR_SURF_16_BANK));
3079
3080                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3081                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3082                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3083                                 NUM_BANKS(ADDR_SURF_16_BANK));
3084
3085                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3086                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3087                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3088                                 NUM_BANKS(ADDR_SURF_8_BANK));
3089
3090                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3091                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3092                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3093                                 NUM_BANKS(ADDR_SURF_4_BANK));
3094
3095                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3096                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3097                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3098                                 NUM_BANKS(ADDR_SURF_4_BANK));
3099
3100                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3101                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3102
3103                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3104                         if (reg_offset != 7)
3105                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3106
3107                 break;
3108         case CHIP_STONEY:
3109                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110                                 PIPE_CONFIG(ADDR_SURF_P2) |
3111                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3112                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3114                                 PIPE_CONFIG(ADDR_SURF_P2) |
3115                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3116                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3118                                 PIPE_CONFIG(ADDR_SURF_P2) |
3119                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3120                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3121                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122                                 PIPE_CONFIG(ADDR_SURF_P2) |
3123                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3124                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3125                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3126                                 PIPE_CONFIG(ADDR_SURF_P2) |
3127                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3128                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3129                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3130                                 PIPE_CONFIG(ADDR_SURF_P2) |
3131                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3132                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3133                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3134                                 PIPE_CONFIG(ADDR_SURF_P2) |
3135                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3136                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3137                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3138                                 PIPE_CONFIG(ADDR_SURF_P2));
3139                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3140                                 PIPE_CONFIG(ADDR_SURF_P2) |
3141                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3142                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3147                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3151                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3155                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3159                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3167                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3168                                  PIPE_CONFIG(ADDR_SURF_P2) |
3169                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3170                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3172                                  PIPE_CONFIG(ADDR_SURF_P2) |
3173                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3176                                  PIPE_CONFIG(ADDR_SURF_P2) |
3177                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3180                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3182                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3183                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3184                                  PIPE_CONFIG(ADDR_SURF_P2) |
3185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3187                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3188                                  PIPE_CONFIG(ADDR_SURF_P2) |
3189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3191                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3192                                  PIPE_CONFIG(ADDR_SURF_P2) |
3193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3195                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3196                                  PIPE_CONFIG(ADDR_SURF_P2) |
3197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3199                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3200                                  PIPE_CONFIG(ADDR_SURF_P2) |
3201                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3202                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204                                  PIPE_CONFIG(ADDR_SURF_P2) |
3205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3208                                  PIPE_CONFIG(ADDR_SURF_P2) |
3209                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3210                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3211
3212                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3214                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215                                 NUM_BANKS(ADDR_SURF_8_BANK));
3216                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3218                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219                                 NUM_BANKS(ADDR_SURF_8_BANK));
3220                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3222                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3223                                 NUM_BANKS(ADDR_SURF_8_BANK));
3224                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3227                                 NUM_BANKS(ADDR_SURF_8_BANK));
3228                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231                                 NUM_BANKS(ADDR_SURF_8_BANK));
3232                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3233                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3234                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3235                                 NUM_BANKS(ADDR_SURF_8_BANK));
3236                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3237                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3238                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3239                                 NUM_BANKS(ADDR_SURF_8_BANK));
3240                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3241                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3242                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243                                 NUM_BANKS(ADDR_SURF_16_BANK));
3244                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3245                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3246                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247                                 NUM_BANKS(ADDR_SURF_16_BANK));
3248                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3249                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3250                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3251                                  NUM_BANKS(ADDR_SURF_16_BANK));
3252                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3253                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3254                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3255                                  NUM_BANKS(ADDR_SURF_16_BANK));
3256                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3258                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259                                  NUM_BANKS(ADDR_SURF_16_BANK));
3260                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3261                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3262                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3263                                  NUM_BANKS(ADDR_SURF_16_BANK));
3264                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3265                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3266                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3267                                  NUM_BANKS(ADDR_SURF_8_BANK));
3268
3269                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3270                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3271                             reg_offset != 23)
3272                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3273
3274                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3275                         if (reg_offset != 7)
3276                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3277
3278                 break;
3279         default:
3280                 dev_warn(adev->dev,
3281                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3282                          adev->asic_type);
3283
3284         case CHIP_CARRIZO:
3285                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3286                                 PIPE_CONFIG(ADDR_SURF_P2) |
3287                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3288                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3290                                 PIPE_CONFIG(ADDR_SURF_P2) |
3291                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3292                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294                                 PIPE_CONFIG(ADDR_SURF_P2) |
3295                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3296                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3297                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3298                                 PIPE_CONFIG(ADDR_SURF_P2) |
3299                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3300                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3301                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3302                                 PIPE_CONFIG(ADDR_SURF_P2) |
3303                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3304                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3305                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3306                                 PIPE_CONFIG(ADDR_SURF_P2) |
3307                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3308                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3309                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3310                                 PIPE_CONFIG(ADDR_SURF_P2) |
3311                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3312                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3313                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3314                                 PIPE_CONFIG(ADDR_SURF_P2));
3315                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3316                                 PIPE_CONFIG(ADDR_SURF_P2) |
3317                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3318                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3320                                  PIPE_CONFIG(ADDR_SURF_P2) |
3321                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3322                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3323                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3324                                  PIPE_CONFIG(ADDR_SURF_P2) |
3325                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3326                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3327                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3328                                  PIPE_CONFIG(ADDR_SURF_P2) |
3329                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3330                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3331                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3332                                  PIPE_CONFIG(ADDR_SURF_P2) |
3333                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3334                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3335                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3336                                  PIPE_CONFIG(ADDR_SURF_P2) |
3337                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3338                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3339                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3340                                  PIPE_CONFIG(ADDR_SURF_P2) |
3341                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3342                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3343                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3344                                  PIPE_CONFIG(ADDR_SURF_P2) |
3345                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3346                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3348                                  PIPE_CONFIG(ADDR_SURF_P2) |
3349                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3352                                  PIPE_CONFIG(ADDR_SURF_P2) |
3353                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3356                                  PIPE_CONFIG(ADDR_SURF_P2) |
3357                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3358                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3359                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3360                                  PIPE_CONFIG(ADDR_SURF_P2) |
3361                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3362                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3363                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3364                                  PIPE_CONFIG(ADDR_SURF_P2) |
3365                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3366                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3367                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3368                                  PIPE_CONFIG(ADDR_SURF_P2) |
3369                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3370                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3371                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3372                                  PIPE_CONFIG(ADDR_SURF_P2) |
3373                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3374                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3375                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3376                                  PIPE_CONFIG(ADDR_SURF_P2) |
3377                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3378                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3379                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3380                                  PIPE_CONFIG(ADDR_SURF_P2) |
3381                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3382                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3383                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3384                                  PIPE_CONFIG(ADDR_SURF_P2) |
3385                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3386                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3387
3388                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3390                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391                                 NUM_BANKS(ADDR_SURF_8_BANK));
3392                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3394                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3395                                 NUM_BANKS(ADDR_SURF_8_BANK));
3396                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3397                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3398                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3399                                 NUM_BANKS(ADDR_SURF_8_BANK));
3400                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3401                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3402                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3403                                 NUM_BANKS(ADDR_SURF_8_BANK));
3404                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3405                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3406                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3407                                 NUM_BANKS(ADDR_SURF_8_BANK));
3408                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3409                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3410                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3411                                 NUM_BANKS(ADDR_SURF_8_BANK));
3412                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3413                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3414                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3415                                 NUM_BANKS(ADDR_SURF_8_BANK));
3416                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3417                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3418                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419                                 NUM_BANKS(ADDR_SURF_16_BANK));
3420                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3421                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3422                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3423                                 NUM_BANKS(ADDR_SURF_16_BANK));
3424                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3425                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3426                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3427                                  NUM_BANKS(ADDR_SURF_16_BANK));
3428                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3429                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3430                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3431                                  NUM_BANKS(ADDR_SURF_16_BANK));
3432                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3433                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3434                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3435                                  NUM_BANKS(ADDR_SURF_16_BANK));
3436                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3437                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3438                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3439                                  NUM_BANKS(ADDR_SURF_16_BANK));
3440                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3441                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3442                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3443                                  NUM_BANKS(ADDR_SURF_8_BANK));
3444
3445                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3446                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3447                             reg_offset != 23)
3448                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3449
3450                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3451                         if (reg_offset != 7)
3452                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3453
3454                 break;
3455         }
3456 }
3457
3458 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3459                                   u32 se_num, u32 sh_num, u32 instance)
3460 {
3461         u32 data;
3462
3463         if (instance == 0xffffffff)
3464                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3465         else
3466                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3467
3468         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3469                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3470                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3471         } else if (se_num == 0xffffffff) {
3472                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3473                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3474         } else if (sh_num == 0xffffffff) {
3475                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3476                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3477         } else {
3478                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3479                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3480         }
3481         WREG32(mmGRBM_GFX_INDEX, data);
3482 }
3483
3484 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3485 {
3486         return (u32)((1ULL << bit_width) - 1);
3487 }
3488
3489 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3490 {
3491         u32 data, mask;
3492
3493         data = RREG32(mmCC_RB_BACKEND_DISABLE);
3494         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3495
3496         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3497         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3498
3499         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3500                                        adev->gfx.config.max_sh_per_se);
3501
3502         return (~data) & mask;
3503 }
3504
3505 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3506 {
3507         int i, j;
3508         u32 data;
3509         u32 active_rbs = 0;
3510         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3511                                         adev->gfx.config.max_sh_per_se;
3512
3513         mutex_lock(&adev->grbm_idx_mutex);
3514         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3515                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3516                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3517                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3518                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3519                                                rb_bitmap_width_per_sh);
3520                 }
3521         }
3522         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3523         mutex_unlock(&adev->grbm_idx_mutex);
3524
3525         adev->gfx.config.backend_enable_mask = active_rbs;
3526         adev->gfx.config.num_rbs = hweight32(active_rbs);
3527 }
3528
3529 /**
3530  * gfx_v8_0_init_compute_vmid - gart enable
3531  *
3532  * @rdev: amdgpu_device pointer
3533  *
3534  * Initialize compute vmid sh_mem registers
3535  *
3536  */
3537 #define DEFAULT_SH_MEM_BASES    (0x6000)
3538 #define FIRST_COMPUTE_VMID      (8)
3539 #define LAST_COMPUTE_VMID       (16)
3540 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3541 {
3542         int i;
3543         uint32_t sh_mem_config;
3544         uint32_t sh_mem_bases;
3545
3546         /*
3547          * Configure apertures:
3548          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3549          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3550          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3551          */
3552         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3553
3554         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3555                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3556                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3557                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3558                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3559                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3560
3561         mutex_lock(&adev->srbm_mutex);
3562         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3563                 vi_srbm_select(adev, 0, 0, 0, i);
3564                 /* CP and shaders */
3565                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3566                 WREG32(mmSH_MEM_APE1_BASE, 1);
3567                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3568                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3569         }
3570         vi_srbm_select(adev, 0, 0, 0, 0);
3571         mutex_unlock(&adev->srbm_mutex);
3572 }
3573
3574 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3575 {
3576         u32 tmp;
3577         int i;
3578
3579         tmp = RREG32(mmGRBM_CNTL);
3580         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3581         WREG32(mmGRBM_CNTL, tmp);
3582
3583         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3584         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3585         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3586
3587         gfx_v8_0_tiling_mode_table_init(adev);
3588
3589         gfx_v8_0_setup_rb(adev);
3590         gfx_v8_0_get_cu_info(adev);
3591
3592         /* XXX SH_MEM regs */
3593         /* where to put LDS, scratch, GPUVM in FSA64 space */
3594         mutex_lock(&adev->srbm_mutex);
3595         for (i = 0; i < 16; i++) {
3596                 vi_srbm_select(adev, 0, 0, 0, i);
3597                 /* CP and shaders */
3598                 if (i == 0) {
3599                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3600                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3601                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3602                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3603                         WREG32(mmSH_MEM_CONFIG, tmp);
3604                 } else {
3605                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3606                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3607                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3608                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3609                         WREG32(mmSH_MEM_CONFIG, tmp);
3610                 }
3611
3612                 WREG32(mmSH_MEM_APE1_BASE, 1);
3613                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3614                 WREG32(mmSH_MEM_BASES, 0);
3615         }
3616         vi_srbm_select(adev, 0, 0, 0, 0);
3617         mutex_unlock(&adev->srbm_mutex);
3618
3619         gfx_v8_0_init_compute_vmid(adev);
3620
3621         mutex_lock(&adev->grbm_idx_mutex);
3622         /*
3623          * making sure that the following register writes will be broadcasted
3624          * to all the shaders
3625          */
3626         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3627
3628         WREG32(mmPA_SC_FIFO_SIZE,
3629                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3630                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3631                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3632                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3633                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3634                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3635                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3636                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3637         mutex_unlock(&adev->grbm_idx_mutex);
3638
3639 }
3640
3641 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3642 {
3643         u32 i, j, k;
3644         u32 mask;
3645
3646         mutex_lock(&adev->grbm_idx_mutex);
3647         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3648                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3649                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3650                         for (k = 0; k < adev->usec_timeout; k++) {
3651                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3652                                         break;
3653                                 udelay(1);
3654                         }
3655                 }
3656         }
3657         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3658         mutex_unlock(&adev->grbm_idx_mutex);
3659
3660         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3661                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3662                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3663                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3664         for (k = 0; k < adev->usec_timeout; k++) {
3665                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3666                         break;
3667                 udelay(1);
3668         }
3669 }
3670
3671 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3672                                                bool enable)
3673 {
3674         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3675
3676         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3677         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3678         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3679         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3680
3681         WREG32(mmCP_INT_CNTL_RING0, tmp);
3682 }
3683
3684 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3685 {
3686         /* csib */
3687         WREG32(mmRLC_CSIB_ADDR_HI,
3688                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3689         WREG32(mmRLC_CSIB_ADDR_LO,
3690                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3691         WREG32(mmRLC_CSIB_LENGTH,
3692                         adev->gfx.rlc.clear_state_size);
3693 }
3694
3695 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3696                                 int ind_offset,
3697                                 int list_size,
3698                                 int *unique_indices,
3699                                 int *indices_count,
3700                                 int max_indices,
3701                                 int *ind_start_offsets,
3702                                 int *offset_count,
3703                                 int max_offset)
3704 {
3705         int indices;
3706         bool new_entry = true;
3707
3708         for (; ind_offset < list_size; ind_offset++) {
3709
3710                 if (new_entry) {
3711                         new_entry = false;
3712                         ind_start_offsets[*offset_count] = ind_offset;
3713                         *offset_count = *offset_count + 1;
3714                         BUG_ON(*offset_count >= max_offset);
3715                 }
3716
3717                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3718                         new_entry = true;
3719                         continue;
3720                 }
3721
3722                 ind_offset += 2;
3723
3724                 /* look for the matching indice */
3725                 for (indices = 0;
3726                         indices < *indices_count;
3727                         indices++) {
3728                         if (unique_indices[indices] ==
3729                                 register_list_format[ind_offset])
3730                                 break;
3731                 }
3732
3733                 if (indices >= *indices_count) {
3734                         unique_indices[*indices_count] =
3735                                 register_list_format[ind_offset];
3736                         indices = *indices_count;
3737                         *indices_count = *indices_count + 1;
3738                         BUG_ON(*indices_count >= max_indices);
3739                 }
3740
3741                 register_list_format[ind_offset] = indices;
3742         }
3743 }
3744
3745 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3746 {
3747         int i, temp, data;
3748         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3749         int indices_count = 0;
3750         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3751         int offset_count = 0;
3752
3753         int list_size;
3754         unsigned int *register_list_format =
3755                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3756         if (register_list_format == NULL)
3757                 return -ENOMEM;
3758         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3759                         adev->gfx.rlc.reg_list_format_size_bytes);
3760
3761         gfx_v8_0_parse_ind_reg_list(register_list_format,
3762                                 RLC_FormatDirectRegListLength,
3763                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3764                                 unique_indices,
3765                                 &indices_count,
3766                                 sizeof(unique_indices) / sizeof(int),
3767                                 indirect_start_offsets,
3768                                 &offset_count,
3769                                 sizeof(indirect_start_offsets)/sizeof(int));
3770
3771         /* save and restore list */
3772         temp = RREG32(mmRLC_SRM_CNTL);
3773         temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3774         WREG32(mmRLC_SRM_CNTL, temp);
3775
3776         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3777         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3778                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3779
3780         /* indirect list */
3781         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3782         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3783                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3784
3785         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3786         list_size = list_size >> 1;
3787         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3788         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3789
3790         /* starting offsets starts */
3791         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3792                 adev->gfx.rlc.starting_offsets_start);
3793         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3794                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3795                                 indirect_start_offsets[i]);
3796
3797         /* unique indices */
3798         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3799         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3800         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3801                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3802                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3803         }
3804         kfree(register_list_format);
3805
3806         return 0;
3807 }
3808
3809 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3810 {
3811         uint32_t data;
3812
3813         data = RREG32(mmRLC_SRM_CNTL);
3814         data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3815         WREG32(mmRLC_SRM_CNTL, data);
3816 }
3817
3818 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3819 {
3820         uint32_t data;
3821
3822         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3823                               AMD_PG_SUPPORT_GFX_SMG |
3824                               AMD_PG_SUPPORT_GFX_DMG)) {
3825                 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3826                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3827                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3828                 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3829
3830                 data = 0;
3831                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3832                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3833                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3834                 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3835                 WREG32(mmRLC_PG_DELAY, data);
3836
3837                 data = RREG32(mmRLC_PG_DELAY_2);
3838                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3839                 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3840                 WREG32(mmRLC_PG_DELAY_2, data);
3841
3842                 data = RREG32(mmRLC_AUTO_PG_CTRL);
3843                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3844                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3845                 WREG32(mmRLC_AUTO_PG_CTRL, data);
3846         }
3847 }
3848
3849 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3850                                                 bool enable)
3851 {
3852         u32 data, orig;
3853
3854         orig = data = RREG32(mmRLC_PG_CNTL);
3855
3856         if (enable)
3857                 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3858         else
3859                 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3860
3861         if (orig != data)
3862                 WREG32(mmRLC_PG_CNTL, data);
3863 }
3864
3865 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3866                                                   bool enable)
3867 {
3868         u32 data, orig;
3869
3870         orig = data = RREG32(mmRLC_PG_CNTL);
3871
3872         if (enable)
3873                 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3874         else
3875                 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3876
3877         if (orig != data)
3878                 WREG32(mmRLC_PG_CNTL, data);
3879 }
3880
3881 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3882 {
3883         u32 data, orig;
3884
3885         orig = data = RREG32(mmRLC_PG_CNTL);
3886
3887         if (enable)
3888                 data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3889         else
3890                 data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3891
3892         if (orig != data)
3893                 WREG32(mmRLC_PG_CNTL, data);
3894 }
3895
3896 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3897 {
3898         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3899                               AMD_PG_SUPPORT_GFX_SMG |
3900                               AMD_PG_SUPPORT_GFX_DMG |
3901                               AMD_PG_SUPPORT_CP |
3902                               AMD_PG_SUPPORT_GDS |
3903                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3904                 gfx_v8_0_init_csb(adev);
3905                 gfx_v8_0_init_save_restore_list(adev);
3906                 gfx_v8_0_enable_save_restore_machine(adev);
3907
3908                 if ((adev->asic_type == CHIP_CARRIZO) ||
3909                     (adev->asic_type == CHIP_STONEY)) {
3910                         WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3911                         gfx_v8_0_init_power_gating(adev);
3912                         WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3913                         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3914                                 cz_enable_sck_slow_down_on_power_up(adev, true);
3915                                 cz_enable_sck_slow_down_on_power_down(adev, true);
3916                         } else {
3917                                 cz_enable_sck_slow_down_on_power_up(adev, false);
3918                                 cz_enable_sck_slow_down_on_power_down(adev, false);
3919                         }
3920                         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3921                                 cz_enable_cp_power_gating(adev, true);
3922                         else
3923                                 cz_enable_cp_power_gating(adev, false);
3924                 } else if (adev->asic_type == CHIP_POLARIS11) {
3925                         gfx_v8_0_init_power_gating(adev);
3926                 }
3927         }
3928 }
3929
3930 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3931 {
3932         u32 tmp = RREG32(mmRLC_CNTL);
3933
3934         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3935         WREG32(mmRLC_CNTL, tmp);
3936
3937         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3938
3939         gfx_v8_0_wait_for_rlc_serdes(adev);
3940 }
3941
3942 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3943 {
3944         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3945
3946         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3947         WREG32(mmGRBM_SOFT_RESET, tmp);
3948         udelay(50);
3949         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3950         WREG32(mmGRBM_SOFT_RESET, tmp);
3951         udelay(50);
3952 }
3953
3954 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3955 {
3956         u32 tmp = RREG32(mmRLC_CNTL);
3957
3958         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3959         WREG32(mmRLC_CNTL, tmp);
3960
3961         /* carrizo do enable cp interrupt after cp inited */
3962         if (!(adev->flags & AMD_IS_APU))
3963                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3964
3965         udelay(50);
3966 }
3967
3968 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3969 {
3970         const struct rlc_firmware_header_v2_0 *hdr;
3971         const __le32 *fw_data;
3972         unsigned i, fw_size;
3973
3974         if (!adev->gfx.rlc_fw)
3975                 return -EINVAL;
3976
3977         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3978         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3979
3980         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3981                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3982         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3983
3984         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3985         for (i = 0; i < fw_size; i++)
3986                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3987         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3988
3989         return 0;
3990 }
3991
3992 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3993 {
3994         int r;
3995
3996         gfx_v8_0_rlc_stop(adev);
3997
3998         /* disable CG */
3999         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
4000         if (adev->asic_type == CHIP_POLARIS11 ||
4001                 adev->asic_type == CHIP_POLARIS10)
4002                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
4003
4004         /* disable PG */
4005         WREG32(mmRLC_PG_CNTL, 0);
4006
4007         gfx_v8_0_rlc_reset(adev);
4008
4009         gfx_v8_0_init_pg(adev);
4010
4011         if (!adev->pp_enabled) {
4012                 if (!adev->firmware.smu_load) {
4013                         /* legacy rlc firmware loading */
4014                         r = gfx_v8_0_rlc_load_microcode(adev);
4015                         if (r)
4016                                 return r;
4017                 } else {
4018                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4019                                                         AMDGPU_UCODE_ID_RLC_G);
4020                         if (r)
4021                                 return -EINVAL;
4022                 }
4023         }
4024
4025         gfx_v8_0_rlc_start(adev);
4026
4027         return 0;
4028 }
4029
4030 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4031 {
4032         int i;
4033         u32 tmp = RREG32(mmCP_ME_CNTL);
4034
4035         if (enable) {
4036                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4037                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4038                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4039         } else {
4040                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4041                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4042                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4043                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4044                         adev->gfx.gfx_ring[i].ready = false;
4045         }
4046         WREG32(mmCP_ME_CNTL, tmp);
4047         udelay(50);
4048 }
4049
4050 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4051 {
4052         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4053         const struct gfx_firmware_header_v1_0 *ce_hdr;
4054         const struct gfx_firmware_header_v1_0 *me_hdr;
4055         const __le32 *fw_data;
4056         unsigned i, fw_size;
4057
4058         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4059                 return -EINVAL;
4060
4061         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4062                 adev->gfx.pfp_fw->data;
4063         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4064                 adev->gfx.ce_fw->data;
4065         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4066                 adev->gfx.me_fw->data;
4067
4068         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4069         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4070         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4071
4072         gfx_v8_0_cp_gfx_enable(adev, false);
4073
4074         /* PFP */
4075         fw_data = (const __le32 *)
4076                 (adev->gfx.pfp_fw->data +
4077                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4078         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4079         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4080         for (i = 0; i < fw_size; i++)
4081                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4082         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4083
4084         /* CE */
4085         fw_data = (const __le32 *)
4086                 (adev->gfx.ce_fw->data +
4087                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4088         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4089         WREG32(mmCP_CE_UCODE_ADDR, 0);
4090         for (i = 0; i < fw_size; i++)
4091                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4092         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4093
4094         /* ME */
4095         fw_data = (const __le32 *)
4096                 (adev->gfx.me_fw->data +
4097                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4098         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4099         WREG32(mmCP_ME_RAM_WADDR, 0);
4100         for (i = 0; i < fw_size; i++)
4101                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4102         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4103
4104         return 0;
4105 }
4106
4107 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4108 {
4109         u32 count = 0;
4110         const struct cs_section_def *sect = NULL;
4111         const struct cs_extent_def *ext = NULL;
4112
4113         /* begin clear state */
4114         count += 2;
4115         /* context control state */
4116         count += 3;
4117
4118         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4119                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4120                         if (sect->id == SECT_CONTEXT)
4121                                 count += 2 + ext->reg_count;
4122                         else
4123                                 return 0;
4124                 }
4125         }
4126         /* pa_sc_raster_config/pa_sc_raster_config1 */
4127         count += 4;
4128         /* end clear state */
4129         count += 2;
4130         /* clear state */
4131         count += 2;
4132
4133         return count;
4134 }
4135
4136 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4137 {
4138         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4139         const struct cs_section_def *sect = NULL;
4140         const struct cs_extent_def *ext = NULL;
4141         int r, i;
4142
4143         /* init the CP */
4144         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4145         WREG32(mmCP_ENDIAN_SWAP, 0);
4146         WREG32(mmCP_DEVICE_ID, 1);
4147
4148         gfx_v8_0_cp_gfx_enable(adev, true);
4149
4150         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4151         if (r) {
4152                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4153                 return r;
4154         }
4155
4156         /* clear state buffer */
4157         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4158         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4159
4160         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4161         amdgpu_ring_write(ring, 0x80000000);
4162         amdgpu_ring_write(ring, 0x80000000);
4163
4164         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4165                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4166                         if (sect->id == SECT_CONTEXT) {
4167                                 amdgpu_ring_write(ring,
4168                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4169                                                ext->reg_count));
4170                                 amdgpu_ring_write(ring,
4171                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4172                                 for (i = 0; i < ext->reg_count; i++)
4173                                         amdgpu_ring_write(ring, ext->extent[i]);
4174                         }
4175                 }
4176         }
4177
4178         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4179         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4180         switch (adev->asic_type) {
4181         case CHIP_TONGA:
4182         case CHIP_POLARIS10:
4183                 amdgpu_ring_write(ring, 0x16000012);
4184                 amdgpu_ring_write(ring, 0x0000002A);
4185                 break;
4186         case CHIP_POLARIS11:
4187                 amdgpu_ring_write(ring, 0x16000012);
4188                 amdgpu_ring_write(ring, 0x00000000);
4189                 break;
4190         case CHIP_FIJI:
4191                 amdgpu_ring_write(ring, 0x3a00161a);
4192                 amdgpu_ring_write(ring, 0x0000002e);
4193                 break;
4194         case CHIP_CARRIZO:
4195                 amdgpu_ring_write(ring, 0x00000002);
4196                 amdgpu_ring_write(ring, 0x00000000);
4197                 break;
4198         case CHIP_TOPAZ:
4199                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4200                                 0x00000000 : 0x00000002);
4201                 amdgpu_ring_write(ring, 0x00000000);
4202                 break;
4203         case CHIP_STONEY:
4204                 amdgpu_ring_write(ring, 0x00000000);
4205                 amdgpu_ring_write(ring, 0x00000000);
4206                 break;
4207         default:
4208                 BUG();
4209         }
4210
4211         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4212         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4213
4214         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4215         amdgpu_ring_write(ring, 0);
4216
4217         /* init the CE partitions */
4218         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4219         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4220         amdgpu_ring_write(ring, 0x8000);
4221         amdgpu_ring_write(ring, 0x8000);
4222
4223         amdgpu_ring_commit(ring);
4224
4225         return 0;
4226 }
4227
4228 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4229 {
4230         struct amdgpu_ring *ring;
4231         u32 tmp;
4232         u32 rb_bufsz;
4233         u64 rb_addr, rptr_addr;
4234         int r;
4235
4236         /* Set the write pointer delay */
4237         WREG32(mmCP_RB_WPTR_DELAY, 0);
4238
4239         /* set the RB to use vmid 0 */
4240         WREG32(mmCP_RB_VMID, 0);
4241
4242         /* Set ring buffer size */
4243         ring = &adev->gfx.gfx_ring[0];
4244         rb_bufsz = order_base_2(ring->ring_size / 8);
4245         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4246         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4247         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4248         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4249 #ifdef __BIG_ENDIAN
4250         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4251 #endif
4252         WREG32(mmCP_RB0_CNTL, tmp);
4253
4254         /* Initialize the ring buffer's read and write pointers */
4255         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4256         ring->wptr = 0;
4257         WREG32(mmCP_RB0_WPTR, ring->wptr);
4258
4259         /* set the wb address wether it's enabled or not */
4260         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4261         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4262         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4263
4264         mdelay(1);
4265         WREG32(mmCP_RB0_CNTL, tmp);
4266
4267         rb_addr = ring->gpu_addr >> 8;
4268         WREG32(mmCP_RB0_BASE, rb_addr);
4269         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4270
4271         /* no gfx doorbells on iceland */
4272         if (adev->asic_type != CHIP_TOPAZ) {
4273                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4274                 if (ring->use_doorbell) {
4275                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4276                                             DOORBELL_OFFSET, ring->doorbell_index);
4277                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4278                                             DOORBELL_HIT, 0);
4279                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4280                                             DOORBELL_EN, 1);
4281                 } else {
4282                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4283                                             DOORBELL_EN, 0);
4284                 }
4285                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4286
4287                 if (adev->asic_type == CHIP_TONGA) {
4288                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4289                                             DOORBELL_RANGE_LOWER,
4290                                             AMDGPU_DOORBELL_GFX_RING0);
4291                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4292
4293                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4294                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4295                 }
4296
4297         }
4298
4299         /* start the ring */
4300         gfx_v8_0_cp_gfx_start(adev);
4301         ring->ready = true;
4302         r = amdgpu_ring_test_ring(ring);
4303         if (r) {
4304                 ring->ready = false;
4305                 return r;
4306         }
4307
4308         return 0;
4309 }
4310
4311 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4312 {
4313         int i;
4314
4315         if (enable) {
4316                 WREG32(mmCP_MEC_CNTL, 0);
4317         } else {
4318                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4319                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4320                         adev->gfx.compute_ring[i].ready = false;
4321         }
4322         udelay(50);
4323 }
4324
4325 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4326 {
4327         const struct gfx_firmware_header_v1_0 *mec_hdr;
4328         const __le32 *fw_data;
4329         unsigned i, fw_size;
4330
4331         if (!adev->gfx.mec_fw)
4332                 return -EINVAL;
4333
4334         gfx_v8_0_cp_compute_enable(adev, false);
4335
4336         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4337         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4338
4339         fw_data = (const __le32 *)
4340                 (adev->gfx.mec_fw->data +
4341                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4342         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4343
4344         /* MEC1 */
4345         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4346         for (i = 0; i < fw_size; i++)
4347                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4348         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4349
4350         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4351         if (adev->gfx.mec2_fw) {
4352                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4353
4354                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4355                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4356
4357                 fw_data = (const __le32 *)
4358                         (adev->gfx.mec2_fw->data +
4359                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4360                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4361
4362                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4363                 for (i = 0; i < fw_size; i++)
4364                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4365                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4366         }
4367
4368         return 0;
4369 }
4370
4371 struct vi_mqd {
4372         uint32_t header;  /* ordinal0 */
4373         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4374         uint32_t compute_dim_x;  /* ordinal2 */
4375         uint32_t compute_dim_y;  /* ordinal3 */
4376         uint32_t compute_dim_z;  /* ordinal4 */
4377         uint32_t compute_start_x;  /* ordinal5 */
4378         uint32_t compute_start_y;  /* ordinal6 */
4379         uint32_t compute_start_z;  /* ordinal7 */
4380         uint32_t compute_num_thread_x;  /* ordinal8 */
4381         uint32_t compute_num_thread_y;  /* ordinal9 */
4382         uint32_t compute_num_thread_z;  /* ordinal10 */
4383         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4384         uint32_t compute_perfcount_enable;  /* ordinal12 */
4385         uint32_t compute_pgm_lo;  /* ordinal13 */
4386         uint32_t compute_pgm_hi;  /* ordinal14 */
4387         uint32_t compute_tba_lo;  /* ordinal15 */
4388         uint32_t compute_tba_hi;  /* ordinal16 */
4389         uint32_t compute_tma_lo;  /* ordinal17 */
4390         uint32_t compute_tma_hi;  /* ordinal18 */
4391         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4392         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4393         uint32_t compute_vmid;  /* ordinal21 */
4394         uint32_t compute_resource_limits;  /* ordinal22 */
4395         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4396         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4397         uint32_t compute_tmpring_size;  /* ordinal25 */
4398         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4399         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4400         uint32_t compute_restart_x;  /* ordinal28 */
4401         uint32_t compute_restart_y;  /* ordinal29 */
4402         uint32_t compute_restart_z;  /* ordinal30 */
4403         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4404         uint32_t compute_misc_reserved;  /* ordinal32 */
4405         uint32_t compute_dispatch_id;  /* ordinal33 */
4406         uint32_t compute_threadgroup_id;  /* ordinal34 */
4407         uint32_t compute_relaunch;  /* ordinal35 */
4408         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4409         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4410         uint32_t compute_wave_restore_control;  /* ordinal38 */
4411         uint32_t reserved9;  /* ordinal39 */
4412         uint32_t reserved10;  /* ordinal40 */
4413         uint32_t reserved11;  /* ordinal41 */
4414         uint32_t reserved12;  /* ordinal42 */
4415         uint32_t reserved13;  /* ordinal43 */
4416         uint32_t reserved14;  /* ordinal44 */
4417         uint32_t reserved15;  /* ordinal45 */
4418         uint32_t reserved16;  /* ordinal46 */
4419         uint32_t reserved17;  /* ordinal47 */
4420         uint32_t reserved18;  /* ordinal48 */
4421         uint32_t reserved19;  /* ordinal49 */
4422         uint32_t reserved20;  /* ordinal50 */
4423         uint32_t reserved21;  /* ordinal51 */
4424         uint32_t reserved22;  /* ordinal52 */
4425         uint32_t reserved23;  /* ordinal53 */
4426         uint32_t reserved24;  /* ordinal54 */
4427         uint32_t reserved25;  /* ordinal55 */
4428         uint32_t reserved26;  /* ordinal56 */
4429         uint32_t reserved27;  /* ordinal57 */
4430         uint32_t reserved28;  /* ordinal58 */
4431         uint32_t reserved29;  /* ordinal59 */
4432         uint32_t reserved30;  /* ordinal60 */
4433         uint32_t reserved31;  /* ordinal61 */
4434         uint32_t reserved32;  /* ordinal62 */
4435         uint32_t reserved33;  /* ordinal63 */
4436         uint32_t reserved34;  /* ordinal64 */
4437         uint32_t compute_user_data_0;  /* ordinal65 */
4438         uint32_t compute_user_data_1;  /* ordinal66 */
4439         uint32_t compute_user_data_2;  /* ordinal67 */
4440         uint32_t compute_user_data_3;  /* ordinal68 */
4441         uint32_t compute_user_data_4;  /* ordinal69 */
4442         uint32_t compute_user_data_5;  /* ordinal70 */
4443         uint32_t compute_user_data_6;  /* ordinal71 */
4444         uint32_t compute_user_data_7;  /* ordinal72 */
4445         uint32_t compute_user_data_8;  /* ordinal73 */
4446         uint32_t compute_user_data_9;  /* ordinal74 */
4447         uint32_t compute_user_data_10;  /* ordinal75 */
4448         uint32_t compute_user_data_11;  /* ordinal76 */
4449         uint32_t compute_user_data_12;  /* ordinal77 */
4450         uint32_t compute_user_data_13;  /* ordinal78 */
4451         uint32_t compute_user_data_14;  /* ordinal79 */
4452         uint32_t compute_user_data_15;  /* ordinal80 */
4453         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4454         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4455         uint32_t reserved35;  /* ordinal83 */
4456         uint32_t reserved36;  /* ordinal84 */
4457         uint32_t reserved37;  /* ordinal85 */
4458         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4459         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4460         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4461         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4462         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4463         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4464         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4465         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4466         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4467         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4468         uint32_t reserved38;  /* ordinal96 */
4469         uint32_t reserved39;  /* ordinal97 */
4470         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4471         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4472         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4473         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4474         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4475         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4476         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4477         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4478         uint32_t reserved40;  /* ordinal106 */
4479         uint32_t reserved41;  /* ordinal107 */
4480         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4481         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4482         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4483         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4484         uint32_t reserved42;  /* ordinal112 */
4485         uint32_t reserved43;  /* ordinal113 */
4486         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4487         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4488         uint32_t cp_packet_id_lo;  /* ordinal116 */
4489         uint32_t cp_packet_id_hi;  /* ordinal117 */
4490         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4491         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4492         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4493         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4494         uint32_t gds_save_mask_lo;  /* ordinal122 */
4495         uint32_t gds_save_mask_hi;  /* ordinal123 */
4496         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4497         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4498         uint32_t reserved44;  /* ordinal126 */
4499         uint32_t reserved45;  /* ordinal127 */
4500         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4501         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4502         uint32_t cp_hqd_active;  /* ordinal130 */
4503         uint32_t cp_hqd_vmid;  /* ordinal131 */
4504         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4505         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4506         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4507         uint32_t cp_hqd_quantum;  /* ordinal135 */
4508         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4509         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4510         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4511         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4512         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4513         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4514         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4515         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4516         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4517         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4518         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4519         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4520         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4521         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4522         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4523         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4524         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4525         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4526         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4527         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4528         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4529         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4530         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4531         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4532         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4533         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4534         uint32_t cp_mqd_control;  /* ordinal162 */
4535         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4536         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4537         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4538         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4539         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4540         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4541         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4542         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4543         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4544         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4545         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4546         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4547         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4548         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4549         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4550         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4551         uint32_t cp_hqd_error;  /* ordinal179 */
4552         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4553         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4554         uint32_t reserved46;  /* ordinal182 */
4555         uint32_t reserved47;  /* ordinal183 */
4556         uint32_t reserved48;  /* ordinal184 */
4557         uint32_t reserved49;  /* ordinal185 */
4558         uint32_t reserved50;  /* ordinal186 */
4559         uint32_t reserved51;  /* ordinal187 */
4560         uint32_t reserved52;  /* ordinal188 */
4561         uint32_t reserved53;  /* ordinal189 */
4562         uint32_t reserved54;  /* ordinal190 */
4563         uint32_t reserved55;  /* ordinal191 */
4564         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4565         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4566         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4567         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4568         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4569         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4570         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4571         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4572         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4573         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4574         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4575         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4576         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4577         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4578         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4579         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4580         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4581         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4582         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4583         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4584         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4585         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4586         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4587         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4588         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4589         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4590         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4591         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4592         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4593         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4594         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4595         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4596         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4597         uint32_t reserved56;  /* ordinal225 */
4598         uint32_t reserved57;  /* ordinal226 */
4599         uint32_t reserved58;  /* ordinal227 */
4600         uint32_t set_resources_header;  /* ordinal228 */
4601         uint32_t set_resources_dw1;  /* ordinal229 */
4602         uint32_t set_resources_dw2;  /* ordinal230 */
4603         uint32_t set_resources_dw3;  /* ordinal231 */
4604         uint32_t set_resources_dw4;  /* ordinal232 */
4605         uint32_t set_resources_dw5;  /* ordinal233 */
4606         uint32_t set_resources_dw6;  /* ordinal234 */
4607         uint32_t set_resources_dw7;  /* ordinal235 */
4608         uint32_t reserved59;  /* ordinal236 */
4609         uint32_t reserved60;  /* ordinal237 */
4610         uint32_t reserved61;  /* ordinal238 */
4611         uint32_t reserved62;  /* ordinal239 */
4612         uint32_t reserved63;  /* ordinal240 */
4613         uint32_t reserved64;  /* ordinal241 */
4614         uint32_t reserved65;  /* ordinal242 */
4615         uint32_t reserved66;  /* ordinal243 */
4616         uint32_t reserved67;  /* ordinal244 */
4617         uint32_t reserved68;  /* ordinal245 */
4618         uint32_t reserved69;  /* ordinal246 */
4619         uint32_t reserved70;  /* ordinal247 */
4620         uint32_t reserved71;  /* ordinal248 */
4621         uint32_t reserved72;  /* ordinal249 */
4622         uint32_t reserved73;  /* ordinal250 */
4623         uint32_t reserved74;  /* ordinal251 */
4624         uint32_t reserved75;  /* ordinal252 */
4625         uint32_t reserved76;  /* ordinal253 */
4626         uint32_t reserved77;  /* ordinal254 */
4627         uint32_t reserved78;  /* ordinal255 */
4628
4629         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4630 };
4631
4632 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4633 {
4634         int i, r;
4635
4636         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4637                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4638
4639                 if (ring->mqd_obj) {
4640                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4641                         if (unlikely(r != 0))
4642                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4643
4644                         amdgpu_bo_unpin(ring->mqd_obj);
4645                         amdgpu_bo_unreserve(ring->mqd_obj);
4646
4647                         amdgpu_bo_unref(&ring->mqd_obj);
4648                         ring->mqd_obj = NULL;
4649                 }
4650         }
4651 }
4652
4653 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4654 {
4655         int r, i, j;
4656         u32 tmp;
4657         bool use_doorbell = true;
4658         u64 hqd_gpu_addr;
4659         u64 mqd_gpu_addr;
4660         u64 eop_gpu_addr;
4661         u64 wb_gpu_addr;
4662         u32 *buf;
4663         struct vi_mqd *mqd;
4664
4665         /* init the pipes */
4666         mutex_lock(&adev->srbm_mutex);
4667         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4668                 int me = (i < 4) ? 1 : 2;
4669                 int pipe = (i < 4) ? i : (i - 4);
4670
4671                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4672                 eop_gpu_addr >>= 8;
4673
4674                 vi_srbm_select(adev, me, pipe, 0, 0);
4675
4676                 /* write the EOP addr */
4677                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4678                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4679
4680                 /* set the VMID assigned */
4681                 WREG32(mmCP_HQD_VMID, 0);
4682
4683                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4684                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4685                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4686                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4687                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4688         }
4689         vi_srbm_select(adev, 0, 0, 0, 0);
4690         mutex_unlock(&adev->srbm_mutex);
4691
4692         /* init the queues.  Just two for now. */
4693         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4694                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4695
4696                 if (ring->mqd_obj == NULL) {
4697                         r = amdgpu_bo_create(adev,
4698                                              sizeof(struct vi_mqd),
4699                                              PAGE_SIZE, true,
4700                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4701                                              NULL, &ring->mqd_obj);
4702                         if (r) {
4703                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4704                                 return r;
4705                         }
4706                 }
4707
4708                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4709                 if (unlikely(r != 0)) {
4710                         gfx_v8_0_cp_compute_fini(adev);
4711                         return r;
4712                 }
4713                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4714                                   &mqd_gpu_addr);
4715                 if (r) {
4716                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4717                         gfx_v8_0_cp_compute_fini(adev);
4718                         return r;
4719                 }
4720                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4721                 if (r) {
4722                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4723                         gfx_v8_0_cp_compute_fini(adev);
4724                         return r;
4725                 }
4726
4727                 /* init the mqd struct */
4728                 memset(buf, 0, sizeof(struct vi_mqd));
4729
4730                 mqd = (struct vi_mqd *)buf;
4731                 mqd->header = 0xC0310800;
4732                 mqd->compute_pipelinestat_enable = 0x00000001;
4733                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4734                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4735                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4736                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4737                 mqd->compute_misc_reserved = 0x00000003;
4738
4739                 mutex_lock(&adev->srbm_mutex);
4740                 vi_srbm_select(adev, ring->me,
4741                                ring->pipe,
4742                                ring->queue, 0);
4743
4744                 /* disable wptr polling */
4745                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4746                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4747                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4748
4749                 mqd->cp_hqd_eop_base_addr_lo =
4750                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4751                 mqd->cp_hqd_eop_base_addr_hi =
4752                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4753
4754                 /* enable doorbell? */
4755                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4756                 if (use_doorbell) {
4757                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4758                 } else {
4759                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4760                 }
4761                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4762                 mqd->cp_hqd_pq_doorbell_control = tmp;
4763
4764                 /* disable the queue if it's active */
4765                 mqd->cp_hqd_dequeue_request = 0;
4766                 mqd->cp_hqd_pq_rptr = 0;
4767                 mqd->cp_hqd_pq_wptr= 0;
4768                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4769                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4770                         for (j = 0; j < adev->usec_timeout; j++) {
4771                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4772                                         break;
4773                                 udelay(1);
4774                         }
4775                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4776                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4777                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4778                 }
4779
4780                 /* set the pointer to the MQD */
4781                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4782                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4783                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4784                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4785
4786                 /* set MQD vmid to 0 */
4787                 tmp = RREG32(mmCP_MQD_CONTROL);
4788                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4789                 WREG32(mmCP_MQD_CONTROL, tmp);
4790                 mqd->cp_mqd_control = tmp;
4791
4792                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4793                 hqd_gpu_addr = ring->gpu_addr >> 8;
4794                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4795                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4796                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4797                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4798
4799                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4800                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4801                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4802                                     (order_base_2(ring->ring_size / 4) - 1));
4803                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4804                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4805 #ifdef __BIG_ENDIAN
4806                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4807 #endif
4808                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4809                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4810                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4811                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4812                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4813                 mqd->cp_hqd_pq_control = tmp;
4814
4815                 /* set the wb address wether it's enabled or not */
4816                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4817                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4818                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4819                         upper_32_bits(wb_gpu_addr) & 0xffff;
4820                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4821                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4822                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4823                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4824
4825                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4826                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4827                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4828                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4829                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4830                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4831                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4832
4833                 /* enable the doorbell if requested */
4834                 if (use_doorbell) {
4835                         if ((adev->asic_type == CHIP_CARRIZO) ||
4836                             (adev->asic_type == CHIP_FIJI) ||
4837                             (adev->asic_type == CHIP_STONEY) ||
4838                             (adev->asic_type == CHIP_POLARIS11) ||
4839                             (adev->asic_type == CHIP_POLARIS10)) {
4840                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4841                                        AMDGPU_DOORBELL_KIQ << 2);
4842                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4843                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4844                         }
4845                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4846                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4847                                             DOORBELL_OFFSET, ring->doorbell_index);
4848                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4849                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4850                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4851                         mqd->cp_hqd_pq_doorbell_control = tmp;
4852
4853                 } else {
4854                         mqd->cp_hqd_pq_doorbell_control = 0;
4855                 }
4856                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4857                        mqd->cp_hqd_pq_doorbell_control);
4858
4859                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4860                 ring->wptr = 0;
4861                 mqd->cp_hqd_pq_wptr = ring->wptr;
4862                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4863                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4864
4865                 /* set the vmid for the queue */
4866                 mqd->cp_hqd_vmid = 0;
4867                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4868
4869                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4870                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4871                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4872                 mqd->cp_hqd_persistent_state = tmp;
4873                 if (adev->asic_type == CHIP_STONEY ||
4874                         adev->asic_type == CHIP_POLARIS11 ||
4875                         adev->asic_type == CHIP_POLARIS10) {
4876                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4877                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4878                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4879                 }
4880
4881                 /* activate the queue */
4882                 mqd->cp_hqd_active = 1;
4883                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4884
4885                 vi_srbm_select(adev, 0, 0, 0, 0);
4886                 mutex_unlock(&adev->srbm_mutex);
4887
4888                 amdgpu_bo_kunmap(ring->mqd_obj);
4889                 amdgpu_bo_unreserve(ring->mqd_obj);
4890         }
4891
4892         if (use_doorbell) {
4893                 tmp = RREG32(mmCP_PQ_STATUS);
4894                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4895                 WREG32(mmCP_PQ_STATUS, tmp);
4896         }
4897
4898         gfx_v8_0_cp_compute_enable(adev, true);
4899
4900         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4901                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4902
4903                 ring->ready = true;
4904                 r = amdgpu_ring_test_ring(ring);
4905                 if (r)
4906                         ring->ready = false;
4907         }
4908
4909         return 0;
4910 }
4911
4912 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4913 {
4914         int r;
4915
4916         if (!(adev->flags & AMD_IS_APU))
4917                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4918
4919         if (!adev->pp_enabled) {
4920                 if (!adev->firmware.smu_load) {
4921                         /* legacy firmware loading */
4922                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4923                         if (r)
4924                                 return r;
4925
4926                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4927                         if (r)
4928                                 return r;
4929                 } else {
4930                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4931                                                         AMDGPU_UCODE_ID_CP_CE);
4932                         if (r)
4933                                 return -EINVAL;
4934
4935                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4936                                                         AMDGPU_UCODE_ID_CP_PFP);
4937                         if (r)
4938                                 return -EINVAL;
4939
4940                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4941                                                         AMDGPU_UCODE_ID_CP_ME);
4942                         if (r)
4943                                 return -EINVAL;
4944
4945                         if (adev->asic_type == CHIP_TOPAZ) {
4946                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4947                                 if (r)
4948                                         return r;
4949                         } else {
4950                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4951                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4952                                 if (r)
4953                                         return -EINVAL;
4954                         }
4955                 }
4956         }
4957
4958         r = gfx_v8_0_cp_gfx_resume(adev);
4959         if (r)
4960                 return r;
4961
4962         r = gfx_v8_0_cp_compute_resume(adev);
4963         if (r)
4964                 return r;
4965
4966         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4967
4968         return 0;
4969 }
4970
4971 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4972 {
4973         gfx_v8_0_cp_gfx_enable(adev, enable);
4974         gfx_v8_0_cp_compute_enable(adev, enable);
4975 }
4976
4977 static int gfx_v8_0_hw_init(void *handle)
4978 {
4979         int r;
4980         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4981
4982         gfx_v8_0_init_golden_registers(adev);
4983
4984         gfx_v8_0_gpu_init(adev);
4985
4986         r = gfx_v8_0_rlc_resume(adev);
4987         if (r)
4988                 return r;
4989
4990         r = gfx_v8_0_cp_resume(adev);
4991         if (r)
4992                 return r;
4993
4994         return r;
4995 }
4996
4997 static int gfx_v8_0_hw_fini(void *handle)
4998 {
4999         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5000
5001         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5002         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5003         gfx_v8_0_cp_enable(adev, false);
5004         gfx_v8_0_rlc_stop(adev);
5005         gfx_v8_0_cp_compute_fini(adev);
5006
5007         amdgpu_set_powergating_state(adev,
5008                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5009
5010         return 0;
5011 }
5012
5013 static int gfx_v8_0_suspend(void *handle)
5014 {
5015         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5016
5017         return gfx_v8_0_hw_fini(adev);
5018 }
5019
5020 static int gfx_v8_0_resume(void *handle)
5021 {
5022         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5023
5024         return gfx_v8_0_hw_init(adev);
5025 }
5026
5027 static bool gfx_v8_0_is_idle(void *handle)
5028 {
5029         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5030
5031         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5032                 return false;
5033         else
5034                 return true;
5035 }
5036
5037 static int gfx_v8_0_wait_for_idle(void *handle)
5038 {
5039         unsigned i;
5040         u32 tmp;
5041         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042
5043         for (i = 0; i < adev->usec_timeout; i++) {
5044                 /* read MC_STATUS */
5045                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
5046
5047                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
5048                         return 0;
5049                 udelay(1);
5050         }
5051         return -ETIMEDOUT;
5052 }
5053
5054 static int gfx_v8_0_soft_reset(void *handle)
5055 {
5056         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5057         u32 tmp;
5058         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5059
5060         /* GRBM_STATUS */
5061         tmp = RREG32(mmGRBM_STATUS);
5062         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5063                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5064                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5065                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5066                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5067                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
5068                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5069                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5070                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5071                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5072         }
5073
5074         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5075                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5076                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5077                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5078                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5079         }
5080
5081         /* GRBM_STATUS2 */
5082         tmp = RREG32(mmGRBM_STATUS2);
5083         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5084                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5085                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5086
5087         /* SRBM_STATUS */
5088         tmp = RREG32(mmSRBM_STATUS);
5089         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5090                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5091                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5092
5093         if (grbm_soft_reset || srbm_soft_reset) {
5094                 /* stop the rlc */
5095                 gfx_v8_0_rlc_stop(adev);
5096
5097                 /* Disable GFX parsing/prefetching */
5098                 gfx_v8_0_cp_gfx_enable(adev, false);
5099
5100                 /* Disable MEC parsing/prefetching */
5101                 gfx_v8_0_cp_compute_enable(adev, false);
5102
5103                 if (grbm_soft_reset || srbm_soft_reset) {
5104                         tmp = RREG32(mmGMCON_DEBUG);
5105                         tmp = REG_SET_FIELD(tmp,
5106                                             GMCON_DEBUG, GFX_STALL, 1);
5107                         tmp = REG_SET_FIELD(tmp,
5108                                             GMCON_DEBUG, GFX_CLEAR, 1);
5109                         WREG32(mmGMCON_DEBUG, tmp);
5110
5111                         udelay(50);
5112                 }
5113
5114                 if (grbm_soft_reset) {
5115                         tmp = RREG32(mmGRBM_SOFT_RESET);
5116                         tmp |= grbm_soft_reset;
5117                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5118                         WREG32(mmGRBM_SOFT_RESET, tmp);
5119                         tmp = RREG32(mmGRBM_SOFT_RESET);
5120
5121                         udelay(50);
5122
5123                         tmp &= ~grbm_soft_reset;
5124                         WREG32(mmGRBM_SOFT_RESET, tmp);
5125                         tmp = RREG32(mmGRBM_SOFT_RESET);
5126                 }
5127
5128                 if (srbm_soft_reset) {
5129                         tmp = RREG32(mmSRBM_SOFT_RESET);
5130                         tmp |= srbm_soft_reset;
5131                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5132                         WREG32(mmSRBM_SOFT_RESET, tmp);
5133                         tmp = RREG32(mmSRBM_SOFT_RESET);
5134
5135                         udelay(50);
5136
5137                         tmp &= ~srbm_soft_reset;
5138                         WREG32(mmSRBM_SOFT_RESET, tmp);
5139                         tmp = RREG32(mmSRBM_SOFT_RESET);
5140                 }
5141
5142                 if (grbm_soft_reset || srbm_soft_reset) {
5143                         tmp = RREG32(mmGMCON_DEBUG);
5144                         tmp = REG_SET_FIELD(tmp,
5145                                             GMCON_DEBUG, GFX_STALL, 0);
5146                         tmp = REG_SET_FIELD(tmp,
5147                                             GMCON_DEBUG, GFX_CLEAR, 0);
5148                         WREG32(mmGMCON_DEBUG, tmp);
5149                 }
5150
5151                 /* Wait a little for things to settle down */
5152                 udelay(50);
5153         }
5154         return 0;
5155 }
5156
5157 /**
5158  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5159  *
5160  * @adev: amdgpu_device pointer
5161  *
5162  * Fetches a GPU clock counter snapshot.
5163  * Returns the 64 bit clock counter snapshot.
5164  */
5165 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5166 {
5167         uint64_t clock;
5168
5169         mutex_lock(&adev->gfx.gpu_clock_mutex);
5170         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5171         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5172                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5173         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5174         return clock;
5175 }
5176
5177 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5178                                           uint32_t vmid,
5179                                           uint32_t gds_base, uint32_t gds_size,
5180                                           uint32_t gws_base, uint32_t gws_size,
5181                                           uint32_t oa_base, uint32_t oa_size)
5182 {
5183         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5184         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5185
5186         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5187         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5188
5189         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5190         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5191
5192         /* GDS Base */
5193         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5194         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5195                                 WRITE_DATA_DST_SEL(0)));
5196         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5197         amdgpu_ring_write(ring, 0);
5198         amdgpu_ring_write(ring, gds_base);
5199
5200         /* GDS Size */
5201         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5202         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5203                                 WRITE_DATA_DST_SEL(0)));
5204         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5205         amdgpu_ring_write(ring, 0);
5206         amdgpu_ring_write(ring, gds_size);
5207
5208         /* GWS */
5209         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5210         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5211                                 WRITE_DATA_DST_SEL(0)));
5212         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5213         amdgpu_ring_write(ring, 0);
5214         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5215
5216         /* OA */
5217         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5218         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5219                                 WRITE_DATA_DST_SEL(0)));
5220         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5221         amdgpu_ring_write(ring, 0);
5222         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5223 }
5224
5225 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5226         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5227         .select_se_sh = &gfx_v8_0_select_se_sh,
5228 };
5229
5230 static int gfx_v8_0_early_init(void *handle)
5231 {
5232         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5233
5234         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5235         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5236         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5237         gfx_v8_0_set_ring_funcs(adev);
5238         gfx_v8_0_set_irq_funcs(adev);
5239         gfx_v8_0_set_gds_init(adev);
5240         gfx_v8_0_set_rlc_funcs(adev);
5241
5242         return 0;
5243 }
5244
5245 static int gfx_v8_0_late_init(void *handle)
5246 {
5247         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5248         int r;
5249
5250         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5251         if (r)
5252                 return r;
5253
5254         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5255         if (r)
5256                 return r;
5257
5258         /* requires IBs so do in late init after IB pool is initialized */
5259         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5260         if (r)
5261                 return r;
5262
5263         amdgpu_set_powergating_state(adev,
5264                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5265
5266         return 0;
5267 }
5268
5269 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5270                                                        bool enable)
5271 {
5272         uint32_t data, temp;
5273
5274         if (adev->asic_type == CHIP_POLARIS11)
5275                 /* Send msg to SMU via Powerplay */
5276                 amdgpu_set_powergating_state(adev,
5277                                              AMD_IP_BLOCK_TYPE_SMC,
5278                                              enable ?
5279                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5280
5281         temp = data = RREG32(mmRLC_PG_CNTL);
5282         /* Enable static MGPG */
5283         if (enable)
5284                 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5285         else
5286                 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5287
5288         if (temp != data)
5289                 WREG32(mmRLC_PG_CNTL, data);
5290 }
5291
5292 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5293                                                         bool enable)
5294 {
5295         uint32_t data, temp;
5296
5297         temp = data = RREG32(mmRLC_PG_CNTL);
5298         /* Enable dynamic MGPG */
5299         if (enable)
5300                 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5301         else
5302                 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5303
5304         if (temp != data)
5305                 WREG32(mmRLC_PG_CNTL, data);
5306 }
5307
5308 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5309                 bool enable)
5310 {
5311         uint32_t data, temp;
5312
5313         temp = data = RREG32(mmRLC_PG_CNTL);
5314         /* Enable quick PG */
5315         if (enable)
5316                 data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5317         else
5318                 data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5319
5320         if (temp != data)
5321                 WREG32(mmRLC_PG_CNTL, data);
5322 }
5323
5324 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5325                                           bool enable)
5326 {
5327         u32 data, orig;
5328
5329         orig = data = RREG32(mmRLC_PG_CNTL);
5330
5331         if (enable)
5332                 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5333         else
5334                 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5335
5336         if (orig != data)
5337                 WREG32(mmRLC_PG_CNTL, data);
5338 }
5339
5340 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5341                                                 bool enable)
5342 {
5343         u32 data, orig;
5344
5345         orig = data = RREG32(mmRLC_PG_CNTL);
5346
5347         if (enable)
5348                 data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5349         else
5350                 data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5351
5352         if (orig != data)
5353                 WREG32(mmRLC_PG_CNTL, data);
5354
5355         /* Read any GFX register to wake up GFX. */
5356         if (!enable)
5357                 data = RREG32(mmDB_RENDER_CONTROL);
5358 }
5359
5360 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5361                                           bool enable)
5362 {
5363         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5364                 cz_enable_gfx_cg_power_gating(adev, true);
5365                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5366                         cz_enable_gfx_pipeline_power_gating(adev, true);
5367         } else {
5368                 cz_enable_gfx_cg_power_gating(adev, false);
5369                 cz_enable_gfx_pipeline_power_gating(adev, false);
5370         }
5371 }
5372
5373 static int gfx_v8_0_set_powergating_state(void *handle,
5374                                           enum amd_powergating_state state)
5375 {
5376         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5377         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5378
5379         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5380                 return 0;
5381
5382         switch (adev->asic_type) {
5383         case CHIP_CARRIZO:
5384         case CHIP_STONEY:
5385                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5386                         cz_update_gfx_cg_power_gating(adev, enable);
5387
5388                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5389                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5390                 else
5391                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5392
5393                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5394                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5395                 else
5396                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5397                 break;
5398         case CHIP_POLARIS11:
5399                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5400                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5401                 else
5402                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5403
5404                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5405                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5406                 else
5407                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5408
5409                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5410                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5411                 else
5412                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5413                 break;
5414         default:
5415                 break;
5416         }
5417
5418         return 0;
5419 }
5420
5421 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5422                                      uint32_t reg_addr, uint32_t cmd)
5423 {
5424         uint32_t data;
5425
5426         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5427
5428         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5429         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5430
5431         data = RREG32(mmRLC_SERDES_WR_CTRL);
5432         if (adev->asic_type == CHIP_STONEY)
5433                         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5434                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5435                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5436                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5437                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5438                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5439                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5440                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5441                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5442         else
5443                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5444                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5445                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5446                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5447                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5448                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5449                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5450                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5451                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5452                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5453                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5454         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5455                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5456                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5457                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5458
5459         WREG32(mmRLC_SERDES_WR_CTRL, data);
5460 }
5461
5462 #define MSG_ENTER_RLC_SAFE_MODE     1
5463 #define MSG_EXIT_RLC_SAFE_MODE      0
5464
5465 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5466 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5467 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5468
5469 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5470 {
5471         u32 data = 0;
5472         unsigned i;
5473
5474         data = RREG32(mmRLC_CNTL);
5475         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5476                 return;
5477
5478         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5479             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5480                                AMD_PG_SUPPORT_GFX_DMG))) {
5481                 data |= RLC_GPR_REG2__REQ_MASK;
5482                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5483                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5484                 WREG32(mmRLC_GPR_REG2, data);
5485
5486                 for (i = 0; i < adev->usec_timeout; i++) {
5487                         if ((RREG32(mmRLC_GPM_STAT) &
5488                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5489                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5490                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5491                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5492                                 break;
5493                         udelay(1);
5494                 }
5495
5496                 for (i = 0; i < adev->usec_timeout; i++) {
5497                         if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5498                                 break;
5499                         udelay(1);
5500                 }
5501                 adev->gfx.rlc.in_safe_mode = true;
5502         }
5503 }
5504
5505 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5506 {
5507         u32 data;
5508         unsigned i;
5509
5510         data = RREG32(mmRLC_CNTL);
5511         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5512                 return;
5513
5514         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5515             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5516                                AMD_PG_SUPPORT_GFX_DMG))) {
5517                 data |= RLC_GPR_REG2__REQ_MASK;
5518                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5519                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5520                 WREG32(mmRLC_GPR_REG2, data);
5521                 adev->gfx.rlc.in_safe_mode = false;
5522         }
5523
5524         for (i = 0; i < adev->usec_timeout; i++) {
5525                 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5526                         break;
5527                 udelay(1);
5528         }
5529 }
5530
5531 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5532 {
5533         u32 data;
5534         unsigned i;
5535
5536         data = RREG32(mmRLC_CNTL);
5537         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5538                 return;
5539
5540         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5541                 data |= RLC_SAFE_MODE__CMD_MASK;
5542                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5543                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5544                 WREG32(mmRLC_SAFE_MODE, data);
5545
5546                 for (i = 0; i < adev->usec_timeout; i++) {
5547                         if ((RREG32(mmRLC_GPM_STAT) &
5548                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5549                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5550                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5551                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5552                                 break;
5553                         udelay(1);
5554                 }
5555
5556                 for (i = 0; i < adev->usec_timeout; i++) {
5557                         if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5558                                 break;
5559                         udelay(1);
5560                 }
5561                 adev->gfx.rlc.in_safe_mode = true;
5562         }
5563 }
5564
5565 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5566 {
5567         u32 data = 0;
5568         unsigned i;
5569
5570         data = RREG32(mmRLC_CNTL);
5571         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5572                 return;
5573
5574         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5575                 if (adev->gfx.rlc.in_safe_mode) {
5576                         data |= RLC_SAFE_MODE__CMD_MASK;
5577                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5578                         WREG32(mmRLC_SAFE_MODE, data);
5579                         adev->gfx.rlc.in_safe_mode = false;
5580                 }
5581         }
5582
5583         for (i = 0; i < adev->usec_timeout; i++) {
5584                 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5585                         break;
5586                 udelay(1);
5587         }
5588 }
5589
5590 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5591 {
5592         adev->gfx.rlc.in_safe_mode = true;
5593 }
5594
5595 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5596 {
5597         adev->gfx.rlc.in_safe_mode = false;
5598 }
5599
5600 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5601         .enter_safe_mode = cz_enter_rlc_safe_mode,
5602         .exit_safe_mode = cz_exit_rlc_safe_mode
5603 };
5604
5605 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5606         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5607         .exit_safe_mode = iceland_exit_rlc_safe_mode
5608 };
5609
5610 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5611         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5612         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5613 };
5614
5615 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5616                                                       bool enable)
5617 {
5618         uint32_t temp, data;
5619
5620         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5621
5622         /* It is disabled by HW by default */
5623         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5624                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5625                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5626                                 /* 1 - RLC memory Light sleep */
5627                                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5628                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5629                                 if (temp != data)
5630                                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5631                         }
5632
5633                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5634                                 /* 2 - CP memory Light sleep */
5635                                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5636                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5637                                 if (temp != data)
5638                                         WREG32(mmCP_MEM_SLP_CNTL, data);
5639                         }
5640                 }
5641
5642                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5643                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5644                 if (adev->flags & AMD_IS_APU)
5645                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5646                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5647                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5648                 else
5649                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5650                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5651                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5652                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5653
5654                 if (temp != data)
5655                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5656
5657                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5658                 gfx_v8_0_wait_for_rlc_serdes(adev);
5659
5660                 /* 5 - clear mgcg override */
5661                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5662
5663                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5664                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5665                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5666                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5667                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5668                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5669                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5670                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5671                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5672                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5673                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5674                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5675                         if (temp != data)
5676                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5677                 }
5678                 udelay(50);
5679
5680                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5681                 gfx_v8_0_wait_for_rlc_serdes(adev);
5682         } else {
5683                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5684                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5685                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5686                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5687                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5688                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5689                 if (temp != data)
5690                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5691
5692                 /* 2 - disable MGLS in RLC */
5693                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5694                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5695                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5696                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5697                 }
5698
5699                 /* 3 - disable MGLS in CP */
5700                 data = RREG32(mmCP_MEM_SLP_CNTL);
5701                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5702                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5703                         WREG32(mmCP_MEM_SLP_CNTL, data);
5704                 }
5705
5706                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5707                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5708                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5709                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5710                 if (temp != data)
5711                         WREG32(mmCGTS_SM_CTRL_REG, data);
5712
5713                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5714                 gfx_v8_0_wait_for_rlc_serdes(adev);
5715
5716                 /* 6 - set mgcg override */
5717                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5718
5719                 udelay(50);
5720
5721                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5722                 gfx_v8_0_wait_for_rlc_serdes(adev);
5723         }
5724
5725         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5726 }
5727
5728 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5729                                                       bool enable)
5730 {
5731         uint32_t temp, temp1, data, data1;
5732
5733         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5734
5735         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5736
5737         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5738                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5739                  * Cmp_busy/GFX_Idle interrupts
5740                  */
5741                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5742
5743                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5744                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5745                 if (temp1 != data1)
5746                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5747
5748                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5749                 gfx_v8_0_wait_for_rlc_serdes(adev);
5750
5751                 /* 3 - clear cgcg override */
5752                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5753
5754                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5755                 gfx_v8_0_wait_for_rlc_serdes(adev);
5756
5757                 /* 4 - write cmd to set CGLS */
5758                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5759
5760                 /* 5 - enable cgcg */
5761                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5762
5763                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5764                         /* enable cgls*/
5765                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5766
5767                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5768                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5769
5770                         if (temp1 != data1)
5771                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5772                 } else {
5773                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5774                 }
5775
5776                 if (temp != data)
5777                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5778         } else {
5779                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5780                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5781
5782                 /* TEST CGCG */
5783                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5784                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5785                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5786                 if (temp1 != data1)
5787                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5788
5789                 /* read gfx register to wake up cgcg */
5790                 RREG32(mmCB_CGTT_SCLK_CTRL);
5791                 RREG32(mmCB_CGTT_SCLK_CTRL);
5792                 RREG32(mmCB_CGTT_SCLK_CTRL);
5793                 RREG32(mmCB_CGTT_SCLK_CTRL);
5794
5795                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5796                 gfx_v8_0_wait_for_rlc_serdes(adev);
5797
5798                 /* write cmd to Set CGCG Overrride */
5799                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5800
5801                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5802                 gfx_v8_0_wait_for_rlc_serdes(adev);
5803
5804                 /* write cmd to Clear CGLS */
5805                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5806
5807                 /* disable cgcg, cgls should be disabled too. */
5808                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5809                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5810                 if (temp != data)
5811                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5812         }
5813
5814         gfx_v8_0_wait_for_rlc_serdes(adev);
5815
5816         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5817 }
5818 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5819                                             bool enable)
5820 {
5821         if (enable) {
5822                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5823                  * ===  MGCG + MGLS + TS(CG/LS) ===
5824                  */
5825                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5826                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5827         } else {
5828                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5829                  * ===  CGCG + CGLS ===
5830                  */
5831                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5832                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5833         }
5834         return 0;
5835 }
5836
5837 static int gfx_v8_0_set_clockgating_state(void *handle,
5838                                           enum amd_clockgating_state state)
5839 {
5840         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5841
5842         switch (adev->asic_type) {
5843         case CHIP_FIJI:
5844         case CHIP_CARRIZO:
5845         case CHIP_STONEY:
5846                 gfx_v8_0_update_gfx_clock_gating(adev,
5847                                                  state == AMD_CG_STATE_GATE ? true : false);
5848                 break;
5849         default:
5850                 break;
5851         }
5852         return 0;
5853 }
5854
5855 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5856 {
5857         u32 rptr;
5858
5859         rptr = ring->adev->wb.wb[ring->rptr_offs];
5860
5861         return rptr;
5862 }
5863
5864 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5865 {
5866         struct amdgpu_device *adev = ring->adev;
5867         u32 wptr;
5868
5869         if (ring->use_doorbell)
5870                 /* XXX check if swapping is necessary on BE */
5871                 wptr = ring->adev->wb.wb[ring->wptr_offs];
5872         else
5873                 wptr = RREG32(mmCP_RB0_WPTR);
5874
5875         return wptr;
5876 }
5877
5878 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5879 {
5880         struct amdgpu_device *adev = ring->adev;
5881
5882         if (ring->use_doorbell) {
5883                 /* XXX check if swapping is necessary on BE */
5884                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5885                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5886         } else {
5887                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5888                 (void)RREG32(mmCP_RB0_WPTR);
5889         }
5890 }
5891
5892 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5893 {
5894         u32 ref_and_mask, reg_mem_engine;
5895
5896         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5897                 switch (ring->me) {
5898                 case 1:
5899                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5900                         break;
5901                 case 2:
5902                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5903                         break;
5904                 default:
5905                         return;
5906                 }
5907                 reg_mem_engine = 0;
5908         } else {
5909                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5910                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5911         }
5912
5913         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5914         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5915                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
5916                                  reg_mem_engine));
5917         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5918         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5919         amdgpu_ring_write(ring, ref_and_mask);
5920         amdgpu_ring_write(ring, ref_and_mask);
5921         amdgpu_ring_write(ring, 0x20); /* poll interval */
5922 }
5923
5924 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5925 {
5926         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5927         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5928                                  WRITE_DATA_DST_SEL(0) |
5929                                  WR_CONFIRM));
5930         amdgpu_ring_write(ring, mmHDP_DEBUG0);
5931         amdgpu_ring_write(ring, 0);
5932         amdgpu_ring_write(ring, 1);
5933
5934 }
5935
5936 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5937                                       struct amdgpu_ib *ib,
5938                                       unsigned vm_id, bool ctx_switch)
5939 {
5940         u32 header, control = 0;
5941
5942         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5943         if (ctx_switch) {
5944                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5945                 amdgpu_ring_write(ring, 0);
5946         }
5947
5948         if (ib->flags & AMDGPU_IB_FLAG_CE)
5949                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5950         else
5951                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5952
5953         control |= ib->length_dw | (vm_id << 24);
5954
5955         amdgpu_ring_write(ring, header);
5956         amdgpu_ring_write(ring,
5957 #ifdef __BIG_ENDIAN
5958                           (2 << 0) |
5959 #endif
5960                           (ib->gpu_addr & 0xFFFFFFFC));
5961         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5962         amdgpu_ring_write(ring, control);
5963 }
5964
5965 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5966                                           struct amdgpu_ib *ib,
5967                                           unsigned vm_id, bool ctx_switch)
5968 {
5969         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
5970
5971         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5972         amdgpu_ring_write(ring,
5973 #ifdef __BIG_ENDIAN
5974                                           (2 << 0) |
5975 #endif
5976                                           (ib->gpu_addr & 0xFFFFFFFC));
5977         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5978         amdgpu_ring_write(ring, control);
5979 }
5980
5981 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5982                                          u64 seq, unsigned flags)
5983 {
5984         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5985         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5986
5987         /* EVENT_WRITE_EOP - flush caches, send int */
5988         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5989         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5990                                  EOP_TC_ACTION_EN |
5991                                  EOP_TC_WB_ACTION_EN |
5992                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5993                                  EVENT_INDEX(5)));
5994         amdgpu_ring_write(ring, addr & 0xfffffffc);
5995         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5996                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5997         amdgpu_ring_write(ring, lower_32_bits(seq));
5998         amdgpu_ring_write(ring, upper_32_bits(seq));
5999
6000 }
6001
6002 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6003 {
6004         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6005         uint32_t seq = ring->fence_drv.sync_seq;
6006         uint64_t addr = ring->fence_drv.gpu_addr;
6007
6008         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6009         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6010                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6011                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6012         amdgpu_ring_write(ring, addr & 0xfffffffc);
6013         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6014         amdgpu_ring_write(ring, seq);
6015         amdgpu_ring_write(ring, 0xffffffff);
6016         amdgpu_ring_write(ring, 4); /* poll interval */
6017
6018         if (usepfp) {
6019                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
6020                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6021                 amdgpu_ring_write(ring, 0);
6022                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6023                 amdgpu_ring_write(ring, 0);
6024         }
6025 }
6026
6027 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6028                                         unsigned vm_id, uint64_t pd_addr)
6029 {
6030         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6031
6032         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6033         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6034                                  WRITE_DATA_DST_SEL(0)) |
6035                                  WR_CONFIRM);
6036         if (vm_id < 8) {
6037                 amdgpu_ring_write(ring,
6038                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6039         } else {
6040                 amdgpu_ring_write(ring,
6041                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6042         }
6043         amdgpu_ring_write(ring, 0);
6044         amdgpu_ring_write(ring, pd_addr >> 12);
6045
6046         /* bits 0-15 are the VM contexts0-15 */
6047         /* invalidate the cache */
6048         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6049         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6050                                  WRITE_DATA_DST_SEL(0)));
6051         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6052         amdgpu_ring_write(ring, 0);
6053         amdgpu_ring_write(ring, 1 << vm_id);
6054
6055         /* wait for the invalidate to complete */
6056         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6057         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6058                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6059                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6060         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6061         amdgpu_ring_write(ring, 0);
6062         amdgpu_ring_write(ring, 0); /* ref */
6063         amdgpu_ring_write(ring, 0); /* mask */
6064         amdgpu_ring_write(ring, 0x20); /* poll interval */
6065
6066         /* compute doesn't have PFP */
6067         if (usepfp) {
6068                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6069                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6070                 amdgpu_ring_write(ring, 0x0);
6071                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6072                 amdgpu_ring_write(ring, 0);
6073                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6074                 amdgpu_ring_write(ring, 0);
6075         }
6076 }
6077
6078 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
6079 {
6080         return ring->adev->wb.wb[ring->rptr_offs];
6081 }
6082
6083 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6084 {
6085         return ring->adev->wb.wb[ring->wptr_offs];
6086 }
6087
6088 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6089 {
6090         struct amdgpu_device *adev = ring->adev;
6091
6092         /* XXX check if swapping is necessary on BE */
6093         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6094         WDOORBELL32(ring->doorbell_index, ring->wptr);
6095 }
6096
6097 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6098                                              u64 addr, u64 seq,
6099                                              unsigned flags)
6100 {
6101         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6102         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6103
6104         /* RELEASE_MEM - flush caches, send int */
6105         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6106         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6107                                  EOP_TC_ACTION_EN |
6108                                  EOP_TC_WB_ACTION_EN |
6109                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6110                                  EVENT_INDEX(5)));
6111         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6112         amdgpu_ring_write(ring, addr & 0xfffffffc);
6113         amdgpu_ring_write(ring, upper_32_bits(addr));
6114         amdgpu_ring_write(ring, lower_32_bits(seq));
6115         amdgpu_ring_write(ring, upper_32_bits(seq));
6116 }
6117
6118 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6119                                                  enum amdgpu_interrupt_state state)
6120 {
6121         u32 cp_int_cntl;
6122
6123         switch (state) {
6124         case AMDGPU_IRQ_STATE_DISABLE:
6125                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6126                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6127                                             TIME_STAMP_INT_ENABLE, 0);
6128                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6129                 break;
6130         case AMDGPU_IRQ_STATE_ENABLE:
6131                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6132                 cp_int_cntl =
6133                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6134                                       TIME_STAMP_INT_ENABLE, 1);
6135                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6136                 break;
6137         default:
6138                 break;
6139         }
6140 }
6141
6142 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6143                                                      int me, int pipe,
6144                                                      enum amdgpu_interrupt_state state)
6145 {
6146         u32 mec_int_cntl, mec_int_cntl_reg;
6147
6148         /*
6149          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6150          * handles the setting of interrupts for this specific pipe. All other
6151          * pipes' interrupts are set by amdkfd.
6152          */
6153
6154         if (me == 1) {
6155                 switch (pipe) {
6156                 case 0:
6157                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6158                         break;
6159                 default:
6160                         DRM_DEBUG("invalid pipe %d\n", pipe);
6161                         return;
6162                 }
6163         } else {
6164                 DRM_DEBUG("invalid me %d\n", me);
6165                 return;
6166         }
6167
6168         switch (state) {
6169         case AMDGPU_IRQ_STATE_DISABLE:
6170                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6171                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6172                                              TIME_STAMP_INT_ENABLE, 0);
6173                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6174                 break;
6175         case AMDGPU_IRQ_STATE_ENABLE:
6176                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6177                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6178                                              TIME_STAMP_INT_ENABLE, 1);
6179                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6180                 break;
6181         default:
6182                 break;
6183         }
6184 }
6185
6186 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6187                                              struct amdgpu_irq_src *source,
6188                                              unsigned type,
6189                                              enum amdgpu_interrupt_state state)
6190 {
6191         u32 cp_int_cntl;
6192
6193         switch (state) {
6194         case AMDGPU_IRQ_STATE_DISABLE:
6195                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6196                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6197                                             PRIV_REG_INT_ENABLE, 0);
6198                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6199                 break;
6200         case AMDGPU_IRQ_STATE_ENABLE:
6201                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6202                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6203                                             PRIV_REG_INT_ENABLE, 1);
6204                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6205                 break;
6206         default:
6207                 break;
6208         }
6209
6210         return 0;
6211 }
6212
6213 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6214                                               struct amdgpu_irq_src *source,
6215                                               unsigned type,
6216                                               enum amdgpu_interrupt_state state)
6217 {
6218         u32 cp_int_cntl;
6219
6220         switch (state) {
6221         case AMDGPU_IRQ_STATE_DISABLE:
6222                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6223                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6224                                             PRIV_INSTR_INT_ENABLE, 0);
6225                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6226                 break;
6227         case AMDGPU_IRQ_STATE_ENABLE:
6228                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6229                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6230                                             PRIV_INSTR_INT_ENABLE, 1);
6231                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6232                 break;
6233         default:
6234                 break;
6235         }
6236
6237         return 0;
6238 }
6239
6240 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6241                                             struct amdgpu_irq_src *src,
6242                                             unsigned type,
6243                                             enum amdgpu_interrupt_state state)
6244 {
6245         switch (type) {
6246         case AMDGPU_CP_IRQ_GFX_EOP:
6247                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6248                 break;
6249         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6250                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6251                 break;
6252         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6253                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6254                 break;
6255         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6256                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6257                 break;
6258         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6259                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6260                 break;
6261         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6262                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6263                 break;
6264         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6265                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6266                 break;
6267         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6268                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6269                 break;
6270         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6271                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6272                 break;
6273         default:
6274                 break;
6275         }
6276         return 0;
6277 }
6278
6279 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6280                             struct amdgpu_irq_src *source,
6281                             struct amdgpu_iv_entry *entry)
6282 {
6283         int i;
6284         u8 me_id, pipe_id, queue_id;
6285         struct amdgpu_ring *ring;
6286
6287         DRM_DEBUG("IH: CP EOP\n");
6288         me_id = (entry->ring_id & 0x0c) >> 2;
6289         pipe_id = (entry->ring_id & 0x03) >> 0;
6290         queue_id = (entry->ring_id & 0x70) >> 4;
6291
6292         switch (me_id) {
6293         case 0:
6294                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6295                 break;
6296         case 1:
6297         case 2:
6298                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6299                         ring = &adev->gfx.compute_ring[i];
6300                         /* Per-queue interrupt is supported for MEC starting from VI.
6301                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6302                           */
6303                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6304                                 amdgpu_fence_process(ring);
6305                 }
6306                 break;
6307         }
6308         return 0;
6309 }
6310
6311 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6312                                  struct amdgpu_irq_src *source,
6313                                  struct amdgpu_iv_entry *entry)
6314 {
6315         DRM_ERROR("Illegal register access in command stream\n");
6316         schedule_work(&adev->reset_work);
6317         return 0;
6318 }
6319
6320 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6321                                   struct amdgpu_irq_src *source,
6322                                   struct amdgpu_iv_entry *entry)
6323 {
6324         DRM_ERROR("Illegal instruction in command stream\n");
6325         schedule_work(&adev->reset_work);
6326         return 0;
6327 }
6328
6329 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6330         .name = "gfx_v8_0",
6331         .early_init = gfx_v8_0_early_init,
6332         .late_init = gfx_v8_0_late_init,
6333         .sw_init = gfx_v8_0_sw_init,
6334         .sw_fini = gfx_v8_0_sw_fini,
6335         .hw_init = gfx_v8_0_hw_init,
6336         .hw_fini = gfx_v8_0_hw_fini,
6337         .suspend = gfx_v8_0_suspend,
6338         .resume = gfx_v8_0_resume,
6339         .is_idle = gfx_v8_0_is_idle,
6340         .wait_for_idle = gfx_v8_0_wait_for_idle,
6341         .soft_reset = gfx_v8_0_soft_reset,
6342         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6343         .set_powergating_state = gfx_v8_0_set_powergating_state,
6344 };
6345
6346 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6347         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6348         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6349         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6350         .parse_cs = NULL,
6351         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6352         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6353         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6354         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6355         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6356         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6357         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6358         .test_ring = gfx_v8_0_ring_test_ring,
6359         .test_ib = gfx_v8_0_ring_test_ib,
6360         .insert_nop = amdgpu_ring_insert_nop,
6361         .pad_ib = amdgpu_ring_generic_pad_ib,
6362 };
6363
6364 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6365         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6366         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6367         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6368         .parse_cs = NULL,
6369         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6370         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6371         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6372         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6373         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6374         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6375         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6376         .test_ring = gfx_v8_0_ring_test_ring,
6377         .test_ib = gfx_v8_0_ring_test_ib,
6378         .insert_nop = amdgpu_ring_insert_nop,
6379         .pad_ib = amdgpu_ring_generic_pad_ib,
6380 };
6381
6382 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6383 {
6384         int i;
6385
6386         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6387                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6388
6389         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6390                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6391 }
6392
6393 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6394         .set = gfx_v8_0_set_eop_interrupt_state,
6395         .process = gfx_v8_0_eop_irq,
6396 };
6397
6398 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6399         .set = gfx_v8_0_set_priv_reg_fault_state,
6400         .process = gfx_v8_0_priv_reg_irq,
6401 };
6402
6403 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6404         .set = gfx_v8_0_set_priv_inst_fault_state,
6405         .process = gfx_v8_0_priv_inst_irq,
6406 };
6407
6408 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6409 {
6410         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6411         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6412
6413         adev->gfx.priv_reg_irq.num_types = 1;
6414         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6415
6416         adev->gfx.priv_inst_irq.num_types = 1;
6417         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6418 }
6419
6420 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6421 {
6422         switch (adev->asic_type) {
6423         case CHIP_TOPAZ:
6424                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6425                 break;
6426         case CHIP_STONEY:
6427         case CHIP_CARRIZO:
6428                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6429                 break;
6430         default:
6431                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6432                 break;
6433         }
6434 }
6435
6436 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6437 {
6438         /* init asci gds info */
6439         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6440         adev->gds.gws.total_size = 64;
6441         adev->gds.oa.total_size = 16;
6442
6443         if (adev->gds.mem.total_size == 64 * 1024) {
6444                 adev->gds.mem.gfx_partition_size = 4096;
6445                 adev->gds.mem.cs_partition_size = 4096;
6446
6447                 adev->gds.gws.gfx_partition_size = 4;
6448                 adev->gds.gws.cs_partition_size = 4;
6449
6450                 adev->gds.oa.gfx_partition_size = 4;
6451                 adev->gds.oa.cs_partition_size = 1;
6452         } else {
6453                 adev->gds.mem.gfx_partition_size = 1024;
6454                 adev->gds.mem.cs_partition_size = 1024;
6455
6456                 adev->gds.gws.gfx_partition_size = 16;
6457                 adev->gds.gws.cs_partition_size = 16;
6458
6459                 adev->gds.oa.gfx_partition_size = 4;
6460                 adev->gds.oa.cs_partition_size = 4;
6461         }
6462 }
6463
6464 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6465                                                  u32 bitmap)
6466 {
6467         u32 data;
6468
6469         if (!bitmap)
6470                 return;
6471
6472         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6473         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6474
6475         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6476 }
6477
6478 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6479 {
6480         u32 data, mask;
6481
6482         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6483         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6484
6485         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6486         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6487
6488         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6489
6490         return (~data) & mask;
6491 }
6492
6493 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6494 {
6495         int i, j, k, counter, active_cu_number = 0;
6496         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6497         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6498         unsigned disable_masks[4 * 2];
6499
6500         memset(cu_info, 0, sizeof(*cu_info));
6501
6502         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6503
6504         mutex_lock(&adev->grbm_idx_mutex);
6505         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6506                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6507                         mask = 1;
6508                         ao_bitmap = 0;
6509                         counter = 0;
6510                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6511                         if (i < 4 && j < 2)
6512                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6513                                         adev, disable_masks[i * 2 + j]);
6514                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6515                         cu_info->bitmap[i][j] = bitmap;
6516
6517                         for (k = 0; k < 16; k ++) {
6518                                 if (bitmap & mask) {
6519                                         if (counter < 2)
6520                                                 ao_bitmap |= mask;
6521                                         counter ++;
6522                                 }
6523                                 mask <<= 1;
6524                         }
6525                         active_cu_number += counter;
6526                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6527                 }
6528         }
6529         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6530         mutex_unlock(&adev->grbm_idx_mutex);
6531
6532         cu_info->number = active_cu_number;
6533         cu_info->ao_cu_mask = ao_cu_mask;
6534 }