regulator: qcom_rpm: Don't update vreg->uV/mV if rpm_reg_write fails
[cascardo/linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 #include "radeon_kfd.h"
36
37 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
46
47 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
48 MODULE_FIRMWARE("radeon/bonaire_me.bin");
49 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
50 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
52 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
54 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
55
56 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
57 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
65
66 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
67 MODULE_FIRMWARE("radeon/hawaii_me.bin");
68 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
69 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
71 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
73 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
74
75 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
76 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
81
82 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
83 MODULE_FIRMWARE("radeon/kaveri_me.bin");
84 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
85 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
87 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
88 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
89
90 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
91 MODULE_FIRMWARE("radeon/KABINI_me.bin");
92 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
93 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
94 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
95 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
96
97 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
98 MODULE_FIRMWARE("radeon/kabini_me.bin");
99 MODULE_FIRMWARE("radeon/kabini_ce.bin");
100 MODULE_FIRMWARE("radeon/kabini_mec.bin");
101 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
102 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
103
104 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
105 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
110
111 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
112 MODULE_FIRMWARE("radeon/mullins_me.bin");
113 MODULE_FIRMWARE("radeon/mullins_ce.bin");
114 MODULE_FIRMWARE("radeon/mullins_mec.bin");
115 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
116 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
117
118 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
119 extern void r600_ih_ring_fini(struct radeon_device *rdev);
120 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
121 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
123 extern void sumo_rlc_fini(struct radeon_device *rdev);
124 extern int sumo_rlc_init(struct radeon_device *rdev);
125 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
126 extern void si_rlc_reset(struct radeon_device *rdev);
127 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
128 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
129 extern int cik_sdma_resume(struct radeon_device *rdev);
130 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
131 extern void cik_sdma_fini(struct radeon_device *rdev);
132 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
133 static void cik_rlc_stop(struct radeon_device *rdev);
134 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
135 static void cik_program_aspm(struct radeon_device *rdev);
136 static void cik_init_pg(struct radeon_device *rdev);
137 static void cik_init_cg(struct radeon_device *rdev);
138 static void cik_fini_pg(struct radeon_device *rdev);
139 static void cik_fini_cg(struct radeon_device *rdev);
140 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
141                                           bool enable);
142
143 /* get temperature in millidegrees */
144 int ci_get_temp(struct radeon_device *rdev)
145 {
146         u32 temp;
147         int actual_temp = 0;
148
149         temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
150                 CTF_TEMP_SHIFT;
151
152         if (temp & 0x200)
153                 actual_temp = 255;
154         else
155                 actual_temp = temp & 0x1ff;
156
157         actual_temp = actual_temp * 1000;
158
159         return actual_temp;
160 }
161
162 /* get temperature in millidegrees */
163 int kv_get_temp(struct radeon_device *rdev)
164 {
165         u32 temp;
166         int actual_temp = 0;
167
168         temp = RREG32_SMC(0xC0300E0C);
169
170         if (temp)
171                 actual_temp = (temp / 8) - 49;
172         else
173                 actual_temp = 0;
174
175         actual_temp = actual_temp * 1000;
176
177         return actual_temp;
178 }
179
180 /*
181  * Indirect registers accessor
182  */
183 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
184 {
185         unsigned long flags;
186         u32 r;
187
188         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
189         WREG32(PCIE_INDEX, reg);
190         (void)RREG32(PCIE_INDEX);
191         r = RREG32(PCIE_DATA);
192         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
193         return r;
194 }
195
196 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
197 {
198         unsigned long flags;
199
200         spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
201         WREG32(PCIE_INDEX, reg);
202         (void)RREG32(PCIE_INDEX);
203         WREG32(PCIE_DATA, v);
204         (void)RREG32(PCIE_DATA);
205         spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
206 }
207
208 static const u32 spectre_rlc_save_restore_register_list[] =
209 {
210         (0x0e00 << 16) | (0xc12c >> 2),
211         0x00000000,
212         (0x0e00 << 16) | (0xc140 >> 2),
213         0x00000000,
214         (0x0e00 << 16) | (0xc150 >> 2),
215         0x00000000,
216         (0x0e00 << 16) | (0xc15c >> 2),
217         0x00000000,
218         (0x0e00 << 16) | (0xc168 >> 2),
219         0x00000000,
220         (0x0e00 << 16) | (0xc170 >> 2),
221         0x00000000,
222         (0x0e00 << 16) | (0xc178 >> 2),
223         0x00000000,
224         (0x0e00 << 16) | (0xc204 >> 2),
225         0x00000000,
226         (0x0e00 << 16) | (0xc2b4 >> 2),
227         0x00000000,
228         (0x0e00 << 16) | (0xc2b8 >> 2),
229         0x00000000,
230         (0x0e00 << 16) | (0xc2bc >> 2),
231         0x00000000,
232         (0x0e00 << 16) | (0xc2c0 >> 2),
233         0x00000000,
234         (0x0e00 << 16) | (0x8228 >> 2),
235         0x00000000,
236         (0x0e00 << 16) | (0x829c >> 2),
237         0x00000000,
238         (0x0e00 << 16) | (0x869c >> 2),
239         0x00000000,
240         (0x0600 << 16) | (0x98f4 >> 2),
241         0x00000000,
242         (0x0e00 << 16) | (0x98f8 >> 2),
243         0x00000000,
244         (0x0e00 << 16) | (0x9900 >> 2),
245         0x00000000,
246         (0x0e00 << 16) | (0xc260 >> 2),
247         0x00000000,
248         (0x0e00 << 16) | (0x90e8 >> 2),
249         0x00000000,
250         (0x0e00 << 16) | (0x3c000 >> 2),
251         0x00000000,
252         (0x0e00 << 16) | (0x3c00c >> 2),
253         0x00000000,
254         (0x0e00 << 16) | (0x8c1c >> 2),
255         0x00000000,
256         (0x0e00 << 16) | (0x9700 >> 2),
257         0x00000000,
258         (0x0e00 << 16) | (0xcd20 >> 2),
259         0x00000000,
260         (0x4e00 << 16) | (0xcd20 >> 2),
261         0x00000000,
262         (0x5e00 << 16) | (0xcd20 >> 2),
263         0x00000000,
264         (0x6e00 << 16) | (0xcd20 >> 2),
265         0x00000000,
266         (0x7e00 << 16) | (0xcd20 >> 2),
267         0x00000000,
268         (0x8e00 << 16) | (0xcd20 >> 2),
269         0x00000000,
270         (0x9e00 << 16) | (0xcd20 >> 2),
271         0x00000000,
272         (0xae00 << 16) | (0xcd20 >> 2),
273         0x00000000,
274         (0xbe00 << 16) | (0xcd20 >> 2),
275         0x00000000,
276         (0x0e00 << 16) | (0x89bc >> 2),
277         0x00000000,
278         (0x0e00 << 16) | (0x8900 >> 2),
279         0x00000000,
280         0x3,
281         (0x0e00 << 16) | (0xc130 >> 2),
282         0x00000000,
283         (0x0e00 << 16) | (0xc134 >> 2),
284         0x00000000,
285         (0x0e00 << 16) | (0xc1fc >> 2),
286         0x00000000,
287         (0x0e00 << 16) | (0xc208 >> 2),
288         0x00000000,
289         (0x0e00 << 16) | (0xc264 >> 2),
290         0x00000000,
291         (0x0e00 << 16) | (0xc268 >> 2),
292         0x00000000,
293         (0x0e00 << 16) | (0xc26c >> 2),
294         0x00000000,
295         (0x0e00 << 16) | (0xc270 >> 2),
296         0x00000000,
297         (0x0e00 << 16) | (0xc274 >> 2),
298         0x00000000,
299         (0x0e00 << 16) | (0xc278 >> 2),
300         0x00000000,
301         (0x0e00 << 16) | (0xc27c >> 2),
302         0x00000000,
303         (0x0e00 << 16) | (0xc280 >> 2),
304         0x00000000,
305         (0x0e00 << 16) | (0xc284 >> 2),
306         0x00000000,
307         (0x0e00 << 16) | (0xc288 >> 2),
308         0x00000000,
309         (0x0e00 << 16) | (0xc28c >> 2),
310         0x00000000,
311         (0x0e00 << 16) | (0xc290 >> 2),
312         0x00000000,
313         (0x0e00 << 16) | (0xc294 >> 2),
314         0x00000000,
315         (0x0e00 << 16) | (0xc298 >> 2),
316         0x00000000,
317         (0x0e00 << 16) | (0xc29c >> 2),
318         0x00000000,
319         (0x0e00 << 16) | (0xc2a0 >> 2),
320         0x00000000,
321         (0x0e00 << 16) | (0xc2a4 >> 2),
322         0x00000000,
323         (0x0e00 << 16) | (0xc2a8 >> 2),
324         0x00000000,
325         (0x0e00 << 16) | (0xc2ac  >> 2),
326         0x00000000,
327         (0x0e00 << 16) | (0xc2b0 >> 2),
328         0x00000000,
329         (0x0e00 << 16) | (0x301d0 >> 2),
330         0x00000000,
331         (0x0e00 << 16) | (0x30238 >> 2),
332         0x00000000,
333         (0x0e00 << 16) | (0x30250 >> 2),
334         0x00000000,
335         (0x0e00 << 16) | (0x30254 >> 2),
336         0x00000000,
337         (0x0e00 << 16) | (0x30258 >> 2),
338         0x00000000,
339         (0x0e00 << 16) | (0x3025c >> 2),
340         0x00000000,
341         (0x4e00 << 16) | (0xc900 >> 2),
342         0x00000000,
343         (0x5e00 << 16) | (0xc900 >> 2),
344         0x00000000,
345         (0x6e00 << 16) | (0xc900 >> 2),
346         0x00000000,
347         (0x7e00 << 16) | (0xc900 >> 2),
348         0x00000000,
349         (0x8e00 << 16) | (0xc900 >> 2),
350         0x00000000,
351         (0x9e00 << 16) | (0xc900 >> 2),
352         0x00000000,
353         (0xae00 << 16) | (0xc900 >> 2),
354         0x00000000,
355         (0xbe00 << 16) | (0xc900 >> 2),
356         0x00000000,
357         (0x4e00 << 16) | (0xc904 >> 2),
358         0x00000000,
359         (0x5e00 << 16) | (0xc904 >> 2),
360         0x00000000,
361         (0x6e00 << 16) | (0xc904 >> 2),
362         0x00000000,
363         (0x7e00 << 16) | (0xc904 >> 2),
364         0x00000000,
365         (0x8e00 << 16) | (0xc904 >> 2),
366         0x00000000,
367         (0x9e00 << 16) | (0xc904 >> 2),
368         0x00000000,
369         (0xae00 << 16) | (0xc904 >> 2),
370         0x00000000,
371         (0xbe00 << 16) | (0xc904 >> 2),
372         0x00000000,
373         (0x4e00 << 16) | (0xc908 >> 2),
374         0x00000000,
375         (0x5e00 << 16) | (0xc908 >> 2),
376         0x00000000,
377         (0x6e00 << 16) | (0xc908 >> 2),
378         0x00000000,
379         (0x7e00 << 16) | (0xc908 >> 2),
380         0x00000000,
381         (0x8e00 << 16) | (0xc908 >> 2),
382         0x00000000,
383         (0x9e00 << 16) | (0xc908 >> 2),
384         0x00000000,
385         (0xae00 << 16) | (0xc908 >> 2),
386         0x00000000,
387         (0xbe00 << 16) | (0xc908 >> 2),
388         0x00000000,
389         (0x4e00 << 16) | (0xc90c >> 2),
390         0x00000000,
391         (0x5e00 << 16) | (0xc90c >> 2),
392         0x00000000,
393         (0x6e00 << 16) | (0xc90c >> 2),
394         0x00000000,
395         (0x7e00 << 16) | (0xc90c >> 2),
396         0x00000000,
397         (0x8e00 << 16) | (0xc90c >> 2),
398         0x00000000,
399         (0x9e00 << 16) | (0xc90c >> 2),
400         0x00000000,
401         (0xae00 << 16) | (0xc90c >> 2),
402         0x00000000,
403         (0xbe00 << 16) | (0xc90c >> 2),
404         0x00000000,
405         (0x4e00 << 16) | (0xc910 >> 2),
406         0x00000000,
407         (0x5e00 << 16) | (0xc910 >> 2),
408         0x00000000,
409         (0x6e00 << 16) | (0xc910 >> 2),
410         0x00000000,
411         (0x7e00 << 16) | (0xc910 >> 2),
412         0x00000000,
413         (0x8e00 << 16) | (0xc910 >> 2),
414         0x00000000,
415         (0x9e00 << 16) | (0xc910 >> 2),
416         0x00000000,
417         (0xae00 << 16) | (0xc910 >> 2),
418         0x00000000,
419         (0xbe00 << 16) | (0xc910 >> 2),
420         0x00000000,
421         (0x0e00 << 16) | (0xc99c >> 2),
422         0x00000000,
423         (0x0e00 << 16) | (0x9834 >> 2),
424         0x00000000,
425         (0x0000 << 16) | (0x30f00 >> 2),
426         0x00000000,
427         (0x0001 << 16) | (0x30f00 >> 2),
428         0x00000000,
429         (0x0000 << 16) | (0x30f04 >> 2),
430         0x00000000,
431         (0x0001 << 16) | (0x30f04 >> 2),
432         0x00000000,
433         (0x0000 << 16) | (0x30f08 >> 2),
434         0x00000000,
435         (0x0001 << 16) | (0x30f08 >> 2),
436         0x00000000,
437         (0x0000 << 16) | (0x30f0c >> 2),
438         0x00000000,
439         (0x0001 << 16) | (0x30f0c >> 2),
440         0x00000000,
441         (0x0600 << 16) | (0x9b7c >> 2),
442         0x00000000,
443         (0x0e00 << 16) | (0x8a14 >> 2),
444         0x00000000,
445         (0x0e00 << 16) | (0x8a18 >> 2),
446         0x00000000,
447         (0x0600 << 16) | (0x30a00 >> 2),
448         0x00000000,
449         (0x0e00 << 16) | (0x8bf0 >> 2),
450         0x00000000,
451         (0x0e00 << 16) | (0x8bcc >> 2),
452         0x00000000,
453         (0x0e00 << 16) | (0x8b24 >> 2),
454         0x00000000,
455         (0x0e00 << 16) | (0x30a04 >> 2),
456         0x00000000,
457         (0x0600 << 16) | (0x30a10 >> 2),
458         0x00000000,
459         (0x0600 << 16) | (0x30a14 >> 2),
460         0x00000000,
461         (0x0600 << 16) | (0x30a18 >> 2),
462         0x00000000,
463         (0x0600 << 16) | (0x30a2c >> 2),
464         0x00000000,
465         (0x0e00 << 16) | (0xc700 >> 2),
466         0x00000000,
467         (0x0e00 << 16) | (0xc704 >> 2),
468         0x00000000,
469         (0x0e00 << 16) | (0xc708 >> 2),
470         0x00000000,
471         (0x0e00 << 16) | (0xc768 >> 2),
472         0x00000000,
473         (0x0400 << 16) | (0xc770 >> 2),
474         0x00000000,
475         (0x0400 << 16) | (0xc774 >> 2),
476         0x00000000,
477         (0x0400 << 16) | (0xc778 >> 2),
478         0x00000000,
479         (0x0400 << 16) | (0xc77c >> 2),
480         0x00000000,
481         (0x0400 << 16) | (0xc780 >> 2),
482         0x00000000,
483         (0x0400 << 16) | (0xc784 >> 2),
484         0x00000000,
485         (0x0400 << 16) | (0xc788 >> 2),
486         0x00000000,
487         (0x0400 << 16) | (0xc78c >> 2),
488         0x00000000,
489         (0x0400 << 16) | (0xc798 >> 2),
490         0x00000000,
491         (0x0400 << 16) | (0xc79c >> 2),
492         0x00000000,
493         (0x0400 << 16) | (0xc7a0 >> 2),
494         0x00000000,
495         (0x0400 << 16) | (0xc7a4 >> 2),
496         0x00000000,
497         (0x0400 << 16) | (0xc7a8 >> 2),
498         0x00000000,
499         (0x0400 << 16) | (0xc7ac >> 2),
500         0x00000000,
501         (0x0400 << 16) | (0xc7b0 >> 2),
502         0x00000000,
503         (0x0400 << 16) | (0xc7b4 >> 2),
504         0x00000000,
505         (0x0e00 << 16) | (0x9100 >> 2),
506         0x00000000,
507         (0x0e00 << 16) | (0x3c010 >> 2),
508         0x00000000,
509         (0x0e00 << 16) | (0x92a8 >> 2),
510         0x00000000,
511         (0x0e00 << 16) | (0x92ac >> 2),
512         0x00000000,
513         (0x0e00 << 16) | (0x92b4 >> 2),
514         0x00000000,
515         (0x0e00 << 16) | (0x92b8 >> 2),
516         0x00000000,
517         (0x0e00 << 16) | (0x92bc >> 2),
518         0x00000000,
519         (0x0e00 << 16) | (0x92c0 >> 2),
520         0x00000000,
521         (0x0e00 << 16) | (0x92c4 >> 2),
522         0x00000000,
523         (0x0e00 << 16) | (0x92c8 >> 2),
524         0x00000000,
525         (0x0e00 << 16) | (0x92cc >> 2),
526         0x00000000,
527         (0x0e00 << 16) | (0x92d0 >> 2),
528         0x00000000,
529         (0x0e00 << 16) | (0x8c00 >> 2),
530         0x00000000,
531         (0x0e00 << 16) | (0x8c04 >> 2),
532         0x00000000,
533         (0x0e00 << 16) | (0x8c20 >> 2),
534         0x00000000,
535         (0x0e00 << 16) | (0x8c38 >> 2),
536         0x00000000,
537         (0x0e00 << 16) | (0x8c3c >> 2),
538         0x00000000,
539         (0x0e00 << 16) | (0xae00 >> 2),
540         0x00000000,
541         (0x0e00 << 16) | (0x9604 >> 2),
542         0x00000000,
543         (0x0e00 << 16) | (0xac08 >> 2),
544         0x00000000,
545         (0x0e00 << 16) | (0xac0c >> 2),
546         0x00000000,
547         (0x0e00 << 16) | (0xac10 >> 2),
548         0x00000000,
549         (0x0e00 << 16) | (0xac14 >> 2),
550         0x00000000,
551         (0x0e00 << 16) | (0xac58 >> 2),
552         0x00000000,
553         (0x0e00 << 16) | (0xac68 >> 2),
554         0x00000000,
555         (0x0e00 << 16) | (0xac6c >> 2),
556         0x00000000,
557         (0x0e00 << 16) | (0xac70 >> 2),
558         0x00000000,
559         (0x0e00 << 16) | (0xac74 >> 2),
560         0x00000000,
561         (0x0e00 << 16) | (0xac78 >> 2),
562         0x00000000,
563         (0x0e00 << 16) | (0xac7c >> 2),
564         0x00000000,
565         (0x0e00 << 16) | (0xac80 >> 2),
566         0x00000000,
567         (0x0e00 << 16) | (0xac84 >> 2),
568         0x00000000,
569         (0x0e00 << 16) | (0xac88 >> 2),
570         0x00000000,
571         (0x0e00 << 16) | (0xac8c >> 2),
572         0x00000000,
573         (0x0e00 << 16) | (0x970c >> 2),
574         0x00000000,
575         (0x0e00 << 16) | (0x9714 >> 2),
576         0x00000000,
577         (0x0e00 << 16) | (0x9718 >> 2),
578         0x00000000,
579         (0x0e00 << 16) | (0x971c >> 2),
580         0x00000000,
581         (0x0e00 << 16) | (0x31068 >> 2),
582         0x00000000,
583         (0x4e00 << 16) | (0x31068 >> 2),
584         0x00000000,
585         (0x5e00 << 16) | (0x31068 >> 2),
586         0x00000000,
587         (0x6e00 << 16) | (0x31068 >> 2),
588         0x00000000,
589         (0x7e00 << 16) | (0x31068 >> 2),
590         0x00000000,
591         (0x8e00 << 16) | (0x31068 >> 2),
592         0x00000000,
593         (0x9e00 << 16) | (0x31068 >> 2),
594         0x00000000,
595         (0xae00 << 16) | (0x31068 >> 2),
596         0x00000000,
597         (0xbe00 << 16) | (0x31068 >> 2),
598         0x00000000,
599         (0x0e00 << 16) | (0xcd10 >> 2),
600         0x00000000,
601         (0x0e00 << 16) | (0xcd14 >> 2),
602         0x00000000,
603         (0x0e00 << 16) | (0x88b0 >> 2),
604         0x00000000,
605         (0x0e00 << 16) | (0x88b4 >> 2),
606         0x00000000,
607         (0x0e00 << 16) | (0x88b8 >> 2),
608         0x00000000,
609         (0x0e00 << 16) | (0x88bc >> 2),
610         0x00000000,
611         (0x0400 << 16) | (0x89c0 >> 2),
612         0x00000000,
613         (0x0e00 << 16) | (0x88c4 >> 2),
614         0x00000000,
615         (0x0e00 << 16) | (0x88c8 >> 2),
616         0x00000000,
617         (0x0e00 << 16) | (0x88d0 >> 2),
618         0x00000000,
619         (0x0e00 << 16) | (0x88d4 >> 2),
620         0x00000000,
621         (0x0e00 << 16) | (0x88d8 >> 2),
622         0x00000000,
623         (0x0e00 << 16) | (0x8980 >> 2),
624         0x00000000,
625         (0x0e00 << 16) | (0x30938 >> 2),
626         0x00000000,
627         (0x0e00 << 16) | (0x3093c >> 2),
628         0x00000000,
629         (0x0e00 << 16) | (0x30940 >> 2),
630         0x00000000,
631         (0x0e00 << 16) | (0x89a0 >> 2),
632         0x00000000,
633         (0x0e00 << 16) | (0x30900 >> 2),
634         0x00000000,
635         (0x0e00 << 16) | (0x30904 >> 2),
636         0x00000000,
637         (0x0e00 << 16) | (0x89b4 >> 2),
638         0x00000000,
639         (0x0e00 << 16) | (0x3c210 >> 2),
640         0x00000000,
641         (0x0e00 << 16) | (0x3c214 >> 2),
642         0x00000000,
643         (0x0e00 << 16) | (0x3c218 >> 2),
644         0x00000000,
645         (0x0e00 << 16) | (0x8904 >> 2),
646         0x00000000,
647         0x5,
648         (0x0e00 << 16) | (0x8c28 >> 2),
649         (0x0e00 << 16) | (0x8c2c >> 2),
650         (0x0e00 << 16) | (0x8c30 >> 2),
651         (0x0e00 << 16) | (0x8c34 >> 2),
652         (0x0e00 << 16) | (0x9600 >> 2),
653 };
654
655 static const u32 kalindi_rlc_save_restore_register_list[] =
656 {
657         (0x0e00 << 16) | (0xc12c >> 2),
658         0x00000000,
659         (0x0e00 << 16) | (0xc140 >> 2),
660         0x00000000,
661         (0x0e00 << 16) | (0xc150 >> 2),
662         0x00000000,
663         (0x0e00 << 16) | (0xc15c >> 2),
664         0x00000000,
665         (0x0e00 << 16) | (0xc168 >> 2),
666         0x00000000,
667         (0x0e00 << 16) | (0xc170 >> 2),
668         0x00000000,
669         (0x0e00 << 16) | (0xc204 >> 2),
670         0x00000000,
671         (0x0e00 << 16) | (0xc2b4 >> 2),
672         0x00000000,
673         (0x0e00 << 16) | (0xc2b8 >> 2),
674         0x00000000,
675         (0x0e00 << 16) | (0xc2bc >> 2),
676         0x00000000,
677         (0x0e00 << 16) | (0xc2c0 >> 2),
678         0x00000000,
679         (0x0e00 << 16) | (0x8228 >> 2),
680         0x00000000,
681         (0x0e00 << 16) | (0x829c >> 2),
682         0x00000000,
683         (0x0e00 << 16) | (0x869c >> 2),
684         0x00000000,
685         (0x0600 << 16) | (0x98f4 >> 2),
686         0x00000000,
687         (0x0e00 << 16) | (0x98f8 >> 2),
688         0x00000000,
689         (0x0e00 << 16) | (0x9900 >> 2),
690         0x00000000,
691         (0x0e00 << 16) | (0xc260 >> 2),
692         0x00000000,
693         (0x0e00 << 16) | (0x90e8 >> 2),
694         0x00000000,
695         (0x0e00 << 16) | (0x3c000 >> 2),
696         0x00000000,
697         (0x0e00 << 16) | (0x3c00c >> 2),
698         0x00000000,
699         (0x0e00 << 16) | (0x8c1c >> 2),
700         0x00000000,
701         (0x0e00 << 16) | (0x9700 >> 2),
702         0x00000000,
703         (0x0e00 << 16) | (0xcd20 >> 2),
704         0x00000000,
705         (0x4e00 << 16) | (0xcd20 >> 2),
706         0x00000000,
707         (0x5e00 << 16) | (0xcd20 >> 2),
708         0x00000000,
709         (0x6e00 << 16) | (0xcd20 >> 2),
710         0x00000000,
711         (0x7e00 << 16) | (0xcd20 >> 2),
712         0x00000000,
713         (0x0e00 << 16) | (0x89bc >> 2),
714         0x00000000,
715         (0x0e00 << 16) | (0x8900 >> 2),
716         0x00000000,
717         0x3,
718         (0x0e00 << 16) | (0xc130 >> 2),
719         0x00000000,
720         (0x0e00 << 16) | (0xc134 >> 2),
721         0x00000000,
722         (0x0e00 << 16) | (0xc1fc >> 2),
723         0x00000000,
724         (0x0e00 << 16) | (0xc208 >> 2),
725         0x00000000,
726         (0x0e00 << 16) | (0xc264 >> 2),
727         0x00000000,
728         (0x0e00 << 16) | (0xc268 >> 2),
729         0x00000000,
730         (0x0e00 << 16) | (0xc26c >> 2),
731         0x00000000,
732         (0x0e00 << 16) | (0xc270 >> 2),
733         0x00000000,
734         (0x0e00 << 16) | (0xc274 >> 2),
735         0x00000000,
736         (0x0e00 << 16) | (0xc28c >> 2),
737         0x00000000,
738         (0x0e00 << 16) | (0xc290 >> 2),
739         0x00000000,
740         (0x0e00 << 16) | (0xc294 >> 2),
741         0x00000000,
742         (0x0e00 << 16) | (0xc298 >> 2),
743         0x00000000,
744         (0x0e00 << 16) | (0xc2a0 >> 2),
745         0x00000000,
746         (0x0e00 << 16) | (0xc2a4 >> 2),
747         0x00000000,
748         (0x0e00 << 16) | (0xc2a8 >> 2),
749         0x00000000,
750         (0x0e00 << 16) | (0xc2ac >> 2),
751         0x00000000,
752         (0x0e00 << 16) | (0x301d0 >> 2),
753         0x00000000,
754         (0x0e00 << 16) | (0x30238 >> 2),
755         0x00000000,
756         (0x0e00 << 16) | (0x30250 >> 2),
757         0x00000000,
758         (0x0e00 << 16) | (0x30254 >> 2),
759         0x00000000,
760         (0x0e00 << 16) | (0x30258 >> 2),
761         0x00000000,
762         (0x0e00 << 16) | (0x3025c >> 2),
763         0x00000000,
764         (0x4e00 << 16) | (0xc900 >> 2),
765         0x00000000,
766         (0x5e00 << 16) | (0xc900 >> 2),
767         0x00000000,
768         (0x6e00 << 16) | (0xc900 >> 2),
769         0x00000000,
770         (0x7e00 << 16) | (0xc900 >> 2),
771         0x00000000,
772         (0x4e00 << 16) | (0xc904 >> 2),
773         0x00000000,
774         (0x5e00 << 16) | (0xc904 >> 2),
775         0x00000000,
776         (0x6e00 << 16) | (0xc904 >> 2),
777         0x00000000,
778         (0x7e00 << 16) | (0xc904 >> 2),
779         0x00000000,
780         (0x4e00 << 16) | (0xc908 >> 2),
781         0x00000000,
782         (0x5e00 << 16) | (0xc908 >> 2),
783         0x00000000,
784         (0x6e00 << 16) | (0xc908 >> 2),
785         0x00000000,
786         (0x7e00 << 16) | (0xc908 >> 2),
787         0x00000000,
788         (0x4e00 << 16) | (0xc90c >> 2),
789         0x00000000,
790         (0x5e00 << 16) | (0xc90c >> 2),
791         0x00000000,
792         (0x6e00 << 16) | (0xc90c >> 2),
793         0x00000000,
794         (0x7e00 << 16) | (0xc90c >> 2),
795         0x00000000,
796         (0x4e00 << 16) | (0xc910 >> 2),
797         0x00000000,
798         (0x5e00 << 16) | (0xc910 >> 2),
799         0x00000000,
800         (0x6e00 << 16) | (0xc910 >> 2),
801         0x00000000,
802         (0x7e00 << 16) | (0xc910 >> 2),
803         0x00000000,
804         (0x0e00 << 16) | (0xc99c >> 2),
805         0x00000000,
806         (0x0e00 << 16) | (0x9834 >> 2),
807         0x00000000,
808         (0x0000 << 16) | (0x30f00 >> 2),
809         0x00000000,
810         (0x0000 << 16) | (0x30f04 >> 2),
811         0x00000000,
812         (0x0000 << 16) | (0x30f08 >> 2),
813         0x00000000,
814         (0x0000 << 16) | (0x30f0c >> 2),
815         0x00000000,
816         (0x0600 << 16) | (0x9b7c >> 2),
817         0x00000000,
818         (0x0e00 << 16) | (0x8a14 >> 2),
819         0x00000000,
820         (0x0e00 << 16) | (0x8a18 >> 2),
821         0x00000000,
822         (0x0600 << 16) | (0x30a00 >> 2),
823         0x00000000,
824         (0x0e00 << 16) | (0x8bf0 >> 2),
825         0x00000000,
826         (0x0e00 << 16) | (0x8bcc >> 2),
827         0x00000000,
828         (0x0e00 << 16) | (0x8b24 >> 2),
829         0x00000000,
830         (0x0e00 << 16) | (0x30a04 >> 2),
831         0x00000000,
832         (0x0600 << 16) | (0x30a10 >> 2),
833         0x00000000,
834         (0x0600 << 16) | (0x30a14 >> 2),
835         0x00000000,
836         (0x0600 << 16) | (0x30a18 >> 2),
837         0x00000000,
838         (0x0600 << 16) | (0x30a2c >> 2),
839         0x00000000,
840         (0x0e00 << 16) | (0xc700 >> 2),
841         0x00000000,
842         (0x0e00 << 16) | (0xc704 >> 2),
843         0x00000000,
844         (0x0e00 << 16) | (0xc708 >> 2),
845         0x00000000,
846         (0x0e00 << 16) | (0xc768 >> 2),
847         0x00000000,
848         (0x0400 << 16) | (0xc770 >> 2),
849         0x00000000,
850         (0x0400 << 16) | (0xc774 >> 2),
851         0x00000000,
852         (0x0400 << 16) | (0xc798 >> 2),
853         0x00000000,
854         (0x0400 << 16) | (0xc79c >> 2),
855         0x00000000,
856         (0x0e00 << 16) | (0x9100 >> 2),
857         0x00000000,
858         (0x0e00 << 16) | (0x3c010 >> 2),
859         0x00000000,
860         (0x0e00 << 16) | (0x8c00 >> 2),
861         0x00000000,
862         (0x0e00 << 16) | (0x8c04 >> 2),
863         0x00000000,
864         (0x0e00 << 16) | (0x8c20 >> 2),
865         0x00000000,
866         (0x0e00 << 16) | (0x8c38 >> 2),
867         0x00000000,
868         (0x0e00 << 16) | (0x8c3c >> 2),
869         0x00000000,
870         (0x0e00 << 16) | (0xae00 >> 2),
871         0x00000000,
872         (0x0e00 << 16) | (0x9604 >> 2),
873         0x00000000,
874         (0x0e00 << 16) | (0xac08 >> 2),
875         0x00000000,
876         (0x0e00 << 16) | (0xac0c >> 2),
877         0x00000000,
878         (0x0e00 << 16) | (0xac10 >> 2),
879         0x00000000,
880         (0x0e00 << 16) | (0xac14 >> 2),
881         0x00000000,
882         (0x0e00 << 16) | (0xac58 >> 2),
883         0x00000000,
884         (0x0e00 << 16) | (0xac68 >> 2),
885         0x00000000,
886         (0x0e00 << 16) | (0xac6c >> 2),
887         0x00000000,
888         (0x0e00 << 16) | (0xac70 >> 2),
889         0x00000000,
890         (0x0e00 << 16) | (0xac74 >> 2),
891         0x00000000,
892         (0x0e00 << 16) | (0xac78 >> 2),
893         0x00000000,
894         (0x0e00 << 16) | (0xac7c >> 2),
895         0x00000000,
896         (0x0e00 << 16) | (0xac80 >> 2),
897         0x00000000,
898         (0x0e00 << 16) | (0xac84 >> 2),
899         0x00000000,
900         (0x0e00 << 16) | (0xac88 >> 2),
901         0x00000000,
902         (0x0e00 << 16) | (0xac8c >> 2),
903         0x00000000,
904         (0x0e00 << 16) | (0x970c >> 2),
905         0x00000000,
906         (0x0e00 << 16) | (0x9714 >> 2),
907         0x00000000,
908         (0x0e00 << 16) | (0x9718 >> 2),
909         0x00000000,
910         (0x0e00 << 16) | (0x971c >> 2),
911         0x00000000,
912         (0x0e00 << 16) | (0x31068 >> 2),
913         0x00000000,
914         (0x4e00 << 16) | (0x31068 >> 2),
915         0x00000000,
916         (0x5e00 << 16) | (0x31068 >> 2),
917         0x00000000,
918         (0x6e00 << 16) | (0x31068 >> 2),
919         0x00000000,
920         (0x7e00 << 16) | (0x31068 >> 2),
921         0x00000000,
922         (0x0e00 << 16) | (0xcd10 >> 2),
923         0x00000000,
924         (0x0e00 << 16) | (0xcd14 >> 2),
925         0x00000000,
926         (0x0e00 << 16) | (0x88b0 >> 2),
927         0x00000000,
928         (0x0e00 << 16) | (0x88b4 >> 2),
929         0x00000000,
930         (0x0e00 << 16) | (0x88b8 >> 2),
931         0x00000000,
932         (0x0e00 << 16) | (0x88bc >> 2),
933         0x00000000,
934         (0x0400 << 16) | (0x89c0 >> 2),
935         0x00000000,
936         (0x0e00 << 16) | (0x88c4 >> 2),
937         0x00000000,
938         (0x0e00 << 16) | (0x88c8 >> 2),
939         0x00000000,
940         (0x0e00 << 16) | (0x88d0 >> 2),
941         0x00000000,
942         (0x0e00 << 16) | (0x88d4 >> 2),
943         0x00000000,
944         (0x0e00 << 16) | (0x88d8 >> 2),
945         0x00000000,
946         (0x0e00 << 16) | (0x8980 >> 2),
947         0x00000000,
948         (0x0e00 << 16) | (0x30938 >> 2),
949         0x00000000,
950         (0x0e00 << 16) | (0x3093c >> 2),
951         0x00000000,
952         (0x0e00 << 16) | (0x30940 >> 2),
953         0x00000000,
954         (0x0e00 << 16) | (0x89a0 >> 2),
955         0x00000000,
956         (0x0e00 << 16) | (0x30900 >> 2),
957         0x00000000,
958         (0x0e00 << 16) | (0x30904 >> 2),
959         0x00000000,
960         (0x0e00 << 16) | (0x89b4 >> 2),
961         0x00000000,
962         (0x0e00 << 16) | (0x3e1fc >> 2),
963         0x00000000,
964         (0x0e00 << 16) | (0x3c210 >> 2),
965         0x00000000,
966         (0x0e00 << 16) | (0x3c214 >> 2),
967         0x00000000,
968         (0x0e00 << 16) | (0x3c218 >> 2),
969         0x00000000,
970         (0x0e00 << 16) | (0x8904 >> 2),
971         0x00000000,
972         0x5,
973         (0x0e00 << 16) | (0x8c28 >> 2),
974         (0x0e00 << 16) | (0x8c2c >> 2),
975         (0x0e00 << 16) | (0x8c30 >> 2),
976         (0x0e00 << 16) | (0x8c34 >> 2),
977         (0x0e00 << 16) | (0x9600 >> 2),
978 };
979
980 static const u32 bonaire_golden_spm_registers[] =
981 {
982         0x30800, 0xe0ffffff, 0xe0000000
983 };
984
985 static const u32 bonaire_golden_common_registers[] =
986 {
987         0xc770, 0xffffffff, 0x00000800,
988         0xc774, 0xffffffff, 0x00000800,
989         0xc798, 0xffffffff, 0x00007fbf,
990         0xc79c, 0xffffffff, 0x00007faf
991 };
992
993 static const u32 bonaire_golden_registers[] =
994 {
995         0x3354, 0x00000333, 0x00000333,
996         0x3350, 0x000c0fc0, 0x00040200,
997         0x9a10, 0x00010000, 0x00058208,
998         0x3c000, 0xffff1fff, 0x00140000,
999         0x3c200, 0xfdfc0fff, 0x00000100,
1000         0x3c234, 0x40000000, 0x40000200,
1001         0x9830, 0xffffffff, 0x00000000,
1002         0x9834, 0xf00fffff, 0x00000400,
1003         0x9838, 0x0002021c, 0x00020200,
1004         0xc78, 0x00000080, 0x00000000,
1005         0x5bb0, 0x000000f0, 0x00000070,
1006         0x5bc0, 0xf0311fff, 0x80300000,
1007         0x98f8, 0x73773777, 0x12010001,
1008         0x350c, 0x00810000, 0x408af000,
1009         0x7030, 0x31000111, 0x00000011,
1010         0x2f48, 0x73773777, 0x12010001,
1011         0x220c, 0x00007fb6, 0x0021a1b1,
1012         0x2210, 0x00007fb6, 0x002021b1,
1013         0x2180, 0x00007fb6, 0x00002191,
1014         0x2218, 0x00007fb6, 0x002121b1,
1015         0x221c, 0x00007fb6, 0x002021b1,
1016         0x21dc, 0x00007fb6, 0x00002191,
1017         0x21e0, 0x00007fb6, 0x00002191,
1018         0x3628, 0x0000003f, 0x0000000a,
1019         0x362c, 0x0000003f, 0x0000000a,
1020         0x2ae4, 0x00073ffe, 0x000022a2,
1021         0x240c, 0x000007ff, 0x00000000,
1022         0x8a14, 0xf000003f, 0x00000007,
1023         0x8bf0, 0x00002001, 0x00000001,
1024         0x8b24, 0xffffffff, 0x00ffffff,
1025         0x30a04, 0x0000ff0f, 0x00000000,
1026         0x28a4c, 0x07ffffff, 0x06000000,
1027         0x4d8, 0x00000fff, 0x00000100,
1028         0x3e78, 0x00000001, 0x00000002,
1029         0x9100, 0x03000000, 0x0362c688,
1030         0x8c00, 0x000000ff, 0x00000001,
1031         0xe40, 0x00001fff, 0x00001fff,
1032         0x9060, 0x0000007f, 0x00000020,
1033         0x9508, 0x00010000, 0x00010000,
1034         0xac14, 0x000003ff, 0x000000f3,
1035         0xac0c, 0xffffffff, 0x00001032
1036 };
1037
1038 static const u32 bonaire_mgcg_cgcg_init[] =
1039 {
1040         0xc420, 0xffffffff, 0xfffffffc,
1041         0x30800, 0xffffffff, 0xe0000000,
1042         0x3c2a0, 0xffffffff, 0x00000100,
1043         0x3c208, 0xffffffff, 0x00000100,
1044         0x3c2c0, 0xffffffff, 0xc0000100,
1045         0x3c2c8, 0xffffffff, 0xc0000100,
1046         0x3c2c4, 0xffffffff, 0xc0000100,
1047         0x55e4, 0xffffffff, 0x00600100,
1048         0x3c280, 0xffffffff, 0x00000100,
1049         0x3c214, 0xffffffff, 0x06000100,
1050         0x3c220, 0xffffffff, 0x00000100,
1051         0x3c218, 0xffffffff, 0x06000100,
1052         0x3c204, 0xffffffff, 0x00000100,
1053         0x3c2e0, 0xffffffff, 0x00000100,
1054         0x3c224, 0xffffffff, 0x00000100,
1055         0x3c200, 0xffffffff, 0x00000100,
1056         0x3c230, 0xffffffff, 0x00000100,
1057         0x3c234, 0xffffffff, 0x00000100,
1058         0x3c250, 0xffffffff, 0x00000100,
1059         0x3c254, 0xffffffff, 0x00000100,
1060         0x3c258, 0xffffffff, 0x00000100,
1061         0x3c25c, 0xffffffff, 0x00000100,
1062         0x3c260, 0xffffffff, 0x00000100,
1063         0x3c27c, 0xffffffff, 0x00000100,
1064         0x3c278, 0xffffffff, 0x00000100,
1065         0x3c210, 0xffffffff, 0x06000100,
1066         0x3c290, 0xffffffff, 0x00000100,
1067         0x3c274, 0xffffffff, 0x00000100,
1068         0x3c2b4, 0xffffffff, 0x00000100,
1069         0x3c2b0, 0xffffffff, 0x00000100,
1070         0x3c270, 0xffffffff, 0x00000100,
1071         0x30800, 0xffffffff, 0xe0000000,
1072         0x3c020, 0xffffffff, 0x00010000,
1073         0x3c024, 0xffffffff, 0x00030002,
1074         0x3c028, 0xffffffff, 0x00040007,
1075         0x3c02c, 0xffffffff, 0x00060005,
1076         0x3c030, 0xffffffff, 0x00090008,
1077         0x3c034, 0xffffffff, 0x00010000,
1078         0x3c038, 0xffffffff, 0x00030002,
1079         0x3c03c, 0xffffffff, 0x00040007,
1080         0x3c040, 0xffffffff, 0x00060005,
1081         0x3c044, 0xffffffff, 0x00090008,
1082         0x3c048, 0xffffffff, 0x00010000,
1083         0x3c04c, 0xffffffff, 0x00030002,
1084         0x3c050, 0xffffffff, 0x00040007,
1085         0x3c054, 0xffffffff, 0x00060005,
1086         0x3c058, 0xffffffff, 0x00090008,
1087         0x3c05c, 0xffffffff, 0x00010000,
1088         0x3c060, 0xffffffff, 0x00030002,
1089         0x3c064, 0xffffffff, 0x00040007,
1090         0x3c068, 0xffffffff, 0x00060005,
1091         0x3c06c, 0xffffffff, 0x00090008,
1092         0x3c070, 0xffffffff, 0x00010000,
1093         0x3c074, 0xffffffff, 0x00030002,
1094         0x3c078, 0xffffffff, 0x00040007,
1095         0x3c07c, 0xffffffff, 0x00060005,
1096         0x3c080, 0xffffffff, 0x00090008,
1097         0x3c084, 0xffffffff, 0x00010000,
1098         0x3c088, 0xffffffff, 0x00030002,
1099         0x3c08c, 0xffffffff, 0x00040007,
1100         0x3c090, 0xffffffff, 0x00060005,
1101         0x3c094, 0xffffffff, 0x00090008,
1102         0x3c098, 0xffffffff, 0x00010000,
1103         0x3c09c, 0xffffffff, 0x00030002,
1104         0x3c0a0, 0xffffffff, 0x00040007,
1105         0x3c0a4, 0xffffffff, 0x00060005,
1106         0x3c0a8, 0xffffffff, 0x00090008,
1107         0x3c000, 0xffffffff, 0x96e00200,
1108         0x8708, 0xffffffff, 0x00900100,
1109         0xc424, 0xffffffff, 0x0020003f,
1110         0x38, 0xffffffff, 0x0140001c,
1111         0x3c, 0x000f0000, 0x000f0000,
1112         0x220, 0xffffffff, 0xC060000C,
1113         0x224, 0xc0000fff, 0x00000100,
1114         0xf90, 0xffffffff, 0x00000100,
1115         0xf98, 0x00000101, 0x00000000,
1116         0x20a8, 0xffffffff, 0x00000104,
1117         0x55e4, 0xff000fff, 0x00000100,
1118         0x30cc, 0xc0000fff, 0x00000104,
1119         0xc1e4, 0x00000001, 0x00000001,
1120         0xd00c, 0xff000ff0, 0x00000100,
1121         0xd80c, 0xff000ff0, 0x00000100
1122 };
1123
1124 static const u32 spectre_golden_spm_registers[] =
1125 {
1126         0x30800, 0xe0ffffff, 0xe0000000
1127 };
1128
1129 static const u32 spectre_golden_common_registers[] =
1130 {
1131         0xc770, 0xffffffff, 0x00000800,
1132         0xc774, 0xffffffff, 0x00000800,
1133         0xc798, 0xffffffff, 0x00007fbf,
1134         0xc79c, 0xffffffff, 0x00007faf
1135 };
1136
1137 static const u32 spectre_golden_registers[] =
1138 {
1139         0x3c000, 0xffff1fff, 0x96940200,
1140         0x3c00c, 0xffff0001, 0xff000000,
1141         0x3c200, 0xfffc0fff, 0x00000100,
1142         0x6ed8, 0x00010101, 0x00010000,
1143         0x9834, 0xf00fffff, 0x00000400,
1144         0x9838, 0xfffffffc, 0x00020200,
1145         0x5bb0, 0x000000f0, 0x00000070,
1146         0x5bc0, 0xf0311fff, 0x80300000,
1147         0x98f8, 0x73773777, 0x12010001,
1148         0x9b7c, 0x00ff0000, 0x00fc0000,
1149         0x2f48, 0x73773777, 0x12010001,
1150         0x8a14, 0xf000003f, 0x00000007,
1151         0x8b24, 0xffffffff, 0x00ffffff,
1152         0x28350, 0x3f3f3fff, 0x00000082,
1153         0x28354, 0x0000003f, 0x00000000,
1154         0x3e78, 0x00000001, 0x00000002,
1155         0x913c, 0xffff03df, 0x00000004,
1156         0xc768, 0x00000008, 0x00000008,
1157         0x8c00, 0x000008ff, 0x00000800,
1158         0x9508, 0x00010000, 0x00010000,
1159         0xac0c, 0xffffffff, 0x54763210,
1160         0x214f8, 0x01ff01ff, 0x00000002,
1161         0x21498, 0x007ff800, 0x00200000,
1162         0x2015c, 0xffffffff, 0x00000f40,
1163         0x30934, 0xffffffff, 0x00000001
1164 };
1165
1166 static const u32 spectre_mgcg_cgcg_init[] =
1167 {
1168         0xc420, 0xffffffff, 0xfffffffc,
1169         0x30800, 0xffffffff, 0xe0000000,
1170         0x3c2a0, 0xffffffff, 0x00000100,
1171         0x3c208, 0xffffffff, 0x00000100,
1172         0x3c2c0, 0xffffffff, 0x00000100,
1173         0x3c2c8, 0xffffffff, 0x00000100,
1174         0x3c2c4, 0xffffffff, 0x00000100,
1175         0x55e4, 0xffffffff, 0x00600100,
1176         0x3c280, 0xffffffff, 0x00000100,
1177         0x3c214, 0xffffffff, 0x06000100,
1178         0x3c220, 0xffffffff, 0x00000100,
1179         0x3c218, 0xffffffff, 0x06000100,
1180         0x3c204, 0xffffffff, 0x00000100,
1181         0x3c2e0, 0xffffffff, 0x00000100,
1182         0x3c224, 0xffffffff, 0x00000100,
1183         0x3c200, 0xffffffff, 0x00000100,
1184         0x3c230, 0xffffffff, 0x00000100,
1185         0x3c234, 0xffffffff, 0x00000100,
1186         0x3c250, 0xffffffff, 0x00000100,
1187         0x3c254, 0xffffffff, 0x00000100,
1188         0x3c258, 0xffffffff, 0x00000100,
1189         0x3c25c, 0xffffffff, 0x00000100,
1190         0x3c260, 0xffffffff, 0x00000100,
1191         0x3c27c, 0xffffffff, 0x00000100,
1192         0x3c278, 0xffffffff, 0x00000100,
1193         0x3c210, 0xffffffff, 0x06000100,
1194         0x3c290, 0xffffffff, 0x00000100,
1195         0x3c274, 0xffffffff, 0x00000100,
1196         0x3c2b4, 0xffffffff, 0x00000100,
1197         0x3c2b0, 0xffffffff, 0x00000100,
1198         0x3c270, 0xffffffff, 0x00000100,
1199         0x30800, 0xffffffff, 0xe0000000,
1200         0x3c020, 0xffffffff, 0x00010000,
1201         0x3c024, 0xffffffff, 0x00030002,
1202         0x3c028, 0xffffffff, 0x00040007,
1203         0x3c02c, 0xffffffff, 0x00060005,
1204         0x3c030, 0xffffffff, 0x00090008,
1205         0x3c034, 0xffffffff, 0x00010000,
1206         0x3c038, 0xffffffff, 0x00030002,
1207         0x3c03c, 0xffffffff, 0x00040007,
1208         0x3c040, 0xffffffff, 0x00060005,
1209         0x3c044, 0xffffffff, 0x00090008,
1210         0x3c048, 0xffffffff, 0x00010000,
1211         0x3c04c, 0xffffffff, 0x00030002,
1212         0x3c050, 0xffffffff, 0x00040007,
1213         0x3c054, 0xffffffff, 0x00060005,
1214         0x3c058, 0xffffffff, 0x00090008,
1215         0x3c05c, 0xffffffff, 0x00010000,
1216         0x3c060, 0xffffffff, 0x00030002,
1217         0x3c064, 0xffffffff, 0x00040007,
1218         0x3c068, 0xffffffff, 0x00060005,
1219         0x3c06c, 0xffffffff, 0x00090008,
1220         0x3c070, 0xffffffff, 0x00010000,
1221         0x3c074, 0xffffffff, 0x00030002,
1222         0x3c078, 0xffffffff, 0x00040007,
1223         0x3c07c, 0xffffffff, 0x00060005,
1224         0x3c080, 0xffffffff, 0x00090008,
1225         0x3c084, 0xffffffff, 0x00010000,
1226         0x3c088, 0xffffffff, 0x00030002,
1227         0x3c08c, 0xffffffff, 0x00040007,
1228         0x3c090, 0xffffffff, 0x00060005,
1229         0x3c094, 0xffffffff, 0x00090008,
1230         0x3c098, 0xffffffff, 0x00010000,
1231         0x3c09c, 0xffffffff, 0x00030002,
1232         0x3c0a0, 0xffffffff, 0x00040007,
1233         0x3c0a4, 0xffffffff, 0x00060005,
1234         0x3c0a8, 0xffffffff, 0x00090008,
1235         0x3c0ac, 0xffffffff, 0x00010000,
1236         0x3c0b0, 0xffffffff, 0x00030002,
1237         0x3c0b4, 0xffffffff, 0x00040007,
1238         0x3c0b8, 0xffffffff, 0x00060005,
1239         0x3c0bc, 0xffffffff, 0x00090008,
1240         0x3c000, 0xffffffff, 0x96e00200,
1241         0x8708, 0xffffffff, 0x00900100,
1242         0xc424, 0xffffffff, 0x0020003f,
1243         0x38, 0xffffffff, 0x0140001c,
1244         0x3c, 0x000f0000, 0x000f0000,
1245         0x220, 0xffffffff, 0xC060000C,
1246         0x224, 0xc0000fff, 0x00000100,
1247         0xf90, 0xffffffff, 0x00000100,
1248         0xf98, 0x00000101, 0x00000000,
1249         0x20a8, 0xffffffff, 0x00000104,
1250         0x55e4, 0xff000fff, 0x00000100,
1251         0x30cc, 0xc0000fff, 0x00000104,
1252         0xc1e4, 0x00000001, 0x00000001,
1253         0xd00c, 0xff000ff0, 0x00000100,
1254         0xd80c, 0xff000ff0, 0x00000100
1255 };
1256
1257 static const u32 kalindi_golden_spm_registers[] =
1258 {
1259         0x30800, 0xe0ffffff, 0xe0000000
1260 };
1261
1262 static const u32 kalindi_golden_common_registers[] =
1263 {
1264         0xc770, 0xffffffff, 0x00000800,
1265         0xc774, 0xffffffff, 0x00000800,
1266         0xc798, 0xffffffff, 0x00007fbf,
1267         0xc79c, 0xffffffff, 0x00007faf
1268 };
1269
1270 static const u32 kalindi_golden_registers[] =
1271 {
1272         0x3c000, 0xffffdfff, 0x6e944040,
1273         0x55e4, 0xff607fff, 0xfc000100,
1274         0x3c220, 0xff000fff, 0x00000100,
1275         0x3c224, 0xff000fff, 0x00000100,
1276         0x3c200, 0xfffc0fff, 0x00000100,
1277         0x6ed8, 0x00010101, 0x00010000,
1278         0x9830, 0xffffffff, 0x00000000,
1279         0x9834, 0xf00fffff, 0x00000400,
1280         0x5bb0, 0x000000f0, 0x00000070,
1281         0x5bc0, 0xf0311fff, 0x80300000,
1282         0x98f8, 0x73773777, 0x12010001,
1283         0x98fc, 0xffffffff, 0x00000010,
1284         0x9b7c, 0x00ff0000, 0x00fc0000,
1285         0x8030, 0x00001f0f, 0x0000100a,
1286         0x2f48, 0x73773777, 0x12010001,
1287         0x2408, 0x000fffff, 0x000c007f,
1288         0x8a14, 0xf000003f, 0x00000007,
1289         0x8b24, 0x3fff3fff, 0x00ffcfff,
1290         0x30a04, 0x0000ff0f, 0x00000000,
1291         0x28a4c, 0x07ffffff, 0x06000000,
1292         0x4d8, 0x00000fff, 0x00000100,
1293         0x3e78, 0x00000001, 0x00000002,
1294         0xc768, 0x00000008, 0x00000008,
1295         0x8c00, 0x000000ff, 0x00000003,
1296         0x214f8, 0x01ff01ff, 0x00000002,
1297         0x21498, 0x007ff800, 0x00200000,
1298         0x2015c, 0xffffffff, 0x00000f40,
1299         0x88c4, 0x001f3ae3, 0x00000082,
1300         0x88d4, 0x0000001f, 0x00000010,
1301         0x30934, 0xffffffff, 0x00000000
1302 };
1303
1304 static const u32 kalindi_mgcg_cgcg_init[] =
1305 {
1306         0xc420, 0xffffffff, 0xfffffffc,
1307         0x30800, 0xffffffff, 0xe0000000,
1308         0x3c2a0, 0xffffffff, 0x00000100,
1309         0x3c208, 0xffffffff, 0x00000100,
1310         0x3c2c0, 0xffffffff, 0x00000100,
1311         0x3c2c8, 0xffffffff, 0x00000100,
1312         0x3c2c4, 0xffffffff, 0x00000100,
1313         0x55e4, 0xffffffff, 0x00600100,
1314         0x3c280, 0xffffffff, 0x00000100,
1315         0x3c214, 0xffffffff, 0x06000100,
1316         0x3c220, 0xffffffff, 0x00000100,
1317         0x3c218, 0xffffffff, 0x06000100,
1318         0x3c204, 0xffffffff, 0x00000100,
1319         0x3c2e0, 0xffffffff, 0x00000100,
1320         0x3c224, 0xffffffff, 0x00000100,
1321         0x3c200, 0xffffffff, 0x00000100,
1322         0x3c230, 0xffffffff, 0x00000100,
1323         0x3c234, 0xffffffff, 0x00000100,
1324         0x3c250, 0xffffffff, 0x00000100,
1325         0x3c254, 0xffffffff, 0x00000100,
1326         0x3c258, 0xffffffff, 0x00000100,
1327         0x3c25c, 0xffffffff, 0x00000100,
1328         0x3c260, 0xffffffff, 0x00000100,
1329         0x3c27c, 0xffffffff, 0x00000100,
1330         0x3c278, 0xffffffff, 0x00000100,
1331         0x3c210, 0xffffffff, 0x06000100,
1332         0x3c290, 0xffffffff, 0x00000100,
1333         0x3c274, 0xffffffff, 0x00000100,
1334         0x3c2b4, 0xffffffff, 0x00000100,
1335         0x3c2b0, 0xffffffff, 0x00000100,
1336         0x3c270, 0xffffffff, 0x00000100,
1337         0x30800, 0xffffffff, 0xe0000000,
1338         0x3c020, 0xffffffff, 0x00010000,
1339         0x3c024, 0xffffffff, 0x00030002,
1340         0x3c028, 0xffffffff, 0x00040007,
1341         0x3c02c, 0xffffffff, 0x00060005,
1342         0x3c030, 0xffffffff, 0x00090008,
1343         0x3c034, 0xffffffff, 0x00010000,
1344         0x3c038, 0xffffffff, 0x00030002,
1345         0x3c03c, 0xffffffff, 0x00040007,
1346         0x3c040, 0xffffffff, 0x00060005,
1347         0x3c044, 0xffffffff, 0x00090008,
1348         0x3c000, 0xffffffff, 0x96e00200,
1349         0x8708, 0xffffffff, 0x00900100,
1350         0xc424, 0xffffffff, 0x0020003f,
1351         0x38, 0xffffffff, 0x0140001c,
1352         0x3c, 0x000f0000, 0x000f0000,
1353         0x220, 0xffffffff, 0xC060000C,
1354         0x224, 0xc0000fff, 0x00000100,
1355         0x20a8, 0xffffffff, 0x00000104,
1356         0x55e4, 0xff000fff, 0x00000100,
1357         0x30cc, 0xc0000fff, 0x00000104,
1358         0xc1e4, 0x00000001, 0x00000001,
1359         0xd00c, 0xff000ff0, 0x00000100,
1360         0xd80c, 0xff000ff0, 0x00000100
1361 };
1362
1363 static const u32 hawaii_golden_spm_registers[] =
1364 {
1365         0x30800, 0xe0ffffff, 0xe0000000
1366 };
1367
1368 static const u32 hawaii_golden_common_registers[] =
1369 {
1370         0x30800, 0xffffffff, 0xe0000000,
1371         0x28350, 0xffffffff, 0x3a00161a,
1372         0x28354, 0xffffffff, 0x0000002e,
1373         0x9a10, 0xffffffff, 0x00018208,
1374         0x98f8, 0xffffffff, 0x12011003
1375 };
1376
1377 static const u32 hawaii_golden_registers[] =
1378 {
1379         0x3354, 0x00000333, 0x00000333,
1380         0x9a10, 0x00010000, 0x00058208,
1381         0x9830, 0xffffffff, 0x00000000,
1382         0x9834, 0xf00fffff, 0x00000400,
1383         0x9838, 0x0002021c, 0x00020200,
1384         0xc78, 0x00000080, 0x00000000,
1385         0x5bb0, 0x000000f0, 0x00000070,
1386         0x5bc0, 0xf0311fff, 0x80300000,
1387         0x350c, 0x00810000, 0x408af000,
1388         0x7030, 0x31000111, 0x00000011,
1389         0x2f48, 0x73773777, 0x12010001,
1390         0x2120, 0x0000007f, 0x0000001b,
1391         0x21dc, 0x00007fb6, 0x00002191,
1392         0x3628, 0x0000003f, 0x0000000a,
1393         0x362c, 0x0000003f, 0x0000000a,
1394         0x2ae4, 0x00073ffe, 0x000022a2,
1395         0x240c, 0x000007ff, 0x00000000,
1396         0x8bf0, 0x00002001, 0x00000001,
1397         0x8b24, 0xffffffff, 0x00ffffff,
1398         0x30a04, 0x0000ff0f, 0x00000000,
1399         0x28a4c, 0x07ffffff, 0x06000000,
1400         0x3e78, 0x00000001, 0x00000002,
1401         0xc768, 0x00000008, 0x00000008,
1402         0xc770, 0x00000f00, 0x00000800,
1403         0xc774, 0x00000f00, 0x00000800,
1404         0xc798, 0x00ffffff, 0x00ff7fbf,
1405         0xc79c, 0x00ffffff, 0x00ff7faf,
1406         0x8c00, 0x000000ff, 0x00000800,
1407         0xe40, 0x00001fff, 0x00001fff,
1408         0x9060, 0x0000007f, 0x00000020,
1409         0x9508, 0x00010000, 0x00010000,
1410         0xae00, 0x00100000, 0x000ff07c,
1411         0xac14, 0x000003ff, 0x0000000f,
1412         0xac10, 0xffffffff, 0x7564fdec,
1413         0xac0c, 0xffffffff, 0x3120b9a8,
1414         0xac08, 0x20000000, 0x0f9c0000
1415 };
1416
1417 static const u32 hawaii_mgcg_cgcg_init[] =
1418 {
1419         0xc420, 0xffffffff, 0xfffffffd,
1420         0x30800, 0xffffffff, 0xe0000000,
1421         0x3c2a0, 0xffffffff, 0x00000100,
1422         0x3c208, 0xffffffff, 0x00000100,
1423         0x3c2c0, 0xffffffff, 0x00000100,
1424         0x3c2c8, 0xffffffff, 0x00000100,
1425         0x3c2c4, 0xffffffff, 0x00000100,
1426         0x55e4, 0xffffffff, 0x00200100,
1427         0x3c280, 0xffffffff, 0x00000100,
1428         0x3c214, 0xffffffff, 0x06000100,
1429         0x3c220, 0xffffffff, 0x00000100,
1430         0x3c218, 0xffffffff, 0x06000100,
1431         0x3c204, 0xffffffff, 0x00000100,
1432         0x3c2e0, 0xffffffff, 0x00000100,
1433         0x3c224, 0xffffffff, 0x00000100,
1434         0x3c200, 0xffffffff, 0x00000100,
1435         0x3c230, 0xffffffff, 0x00000100,
1436         0x3c234, 0xffffffff, 0x00000100,
1437         0x3c250, 0xffffffff, 0x00000100,
1438         0x3c254, 0xffffffff, 0x00000100,
1439         0x3c258, 0xffffffff, 0x00000100,
1440         0x3c25c, 0xffffffff, 0x00000100,
1441         0x3c260, 0xffffffff, 0x00000100,
1442         0x3c27c, 0xffffffff, 0x00000100,
1443         0x3c278, 0xffffffff, 0x00000100,
1444         0x3c210, 0xffffffff, 0x06000100,
1445         0x3c290, 0xffffffff, 0x00000100,
1446         0x3c274, 0xffffffff, 0x00000100,
1447         0x3c2b4, 0xffffffff, 0x00000100,
1448         0x3c2b0, 0xffffffff, 0x00000100,
1449         0x3c270, 0xffffffff, 0x00000100,
1450         0x30800, 0xffffffff, 0xe0000000,
1451         0x3c020, 0xffffffff, 0x00010000,
1452         0x3c024, 0xffffffff, 0x00030002,
1453         0x3c028, 0xffffffff, 0x00040007,
1454         0x3c02c, 0xffffffff, 0x00060005,
1455         0x3c030, 0xffffffff, 0x00090008,
1456         0x3c034, 0xffffffff, 0x00010000,
1457         0x3c038, 0xffffffff, 0x00030002,
1458         0x3c03c, 0xffffffff, 0x00040007,
1459         0x3c040, 0xffffffff, 0x00060005,
1460         0x3c044, 0xffffffff, 0x00090008,
1461         0x3c048, 0xffffffff, 0x00010000,
1462         0x3c04c, 0xffffffff, 0x00030002,
1463         0x3c050, 0xffffffff, 0x00040007,
1464         0x3c054, 0xffffffff, 0x00060005,
1465         0x3c058, 0xffffffff, 0x00090008,
1466         0x3c05c, 0xffffffff, 0x00010000,
1467         0x3c060, 0xffffffff, 0x00030002,
1468         0x3c064, 0xffffffff, 0x00040007,
1469         0x3c068, 0xffffffff, 0x00060005,
1470         0x3c06c, 0xffffffff, 0x00090008,
1471         0x3c070, 0xffffffff, 0x00010000,
1472         0x3c074, 0xffffffff, 0x00030002,
1473         0x3c078, 0xffffffff, 0x00040007,
1474         0x3c07c, 0xffffffff, 0x00060005,
1475         0x3c080, 0xffffffff, 0x00090008,
1476         0x3c084, 0xffffffff, 0x00010000,
1477         0x3c088, 0xffffffff, 0x00030002,
1478         0x3c08c, 0xffffffff, 0x00040007,
1479         0x3c090, 0xffffffff, 0x00060005,
1480         0x3c094, 0xffffffff, 0x00090008,
1481         0x3c098, 0xffffffff, 0x00010000,
1482         0x3c09c, 0xffffffff, 0x00030002,
1483         0x3c0a0, 0xffffffff, 0x00040007,
1484         0x3c0a4, 0xffffffff, 0x00060005,
1485         0x3c0a8, 0xffffffff, 0x00090008,
1486         0x3c0ac, 0xffffffff, 0x00010000,
1487         0x3c0b0, 0xffffffff, 0x00030002,
1488         0x3c0b4, 0xffffffff, 0x00040007,
1489         0x3c0b8, 0xffffffff, 0x00060005,
1490         0x3c0bc, 0xffffffff, 0x00090008,
1491         0x3c0c0, 0xffffffff, 0x00010000,
1492         0x3c0c4, 0xffffffff, 0x00030002,
1493         0x3c0c8, 0xffffffff, 0x00040007,
1494         0x3c0cc, 0xffffffff, 0x00060005,
1495         0x3c0d0, 0xffffffff, 0x00090008,
1496         0x3c0d4, 0xffffffff, 0x00010000,
1497         0x3c0d8, 0xffffffff, 0x00030002,
1498         0x3c0dc, 0xffffffff, 0x00040007,
1499         0x3c0e0, 0xffffffff, 0x00060005,
1500         0x3c0e4, 0xffffffff, 0x00090008,
1501         0x3c0e8, 0xffffffff, 0x00010000,
1502         0x3c0ec, 0xffffffff, 0x00030002,
1503         0x3c0f0, 0xffffffff, 0x00040007,
1504         0x3c0f4, 0xffffffff, 0x00060005,
1505         0x3c0f8, 0xffffffff, 0x00090008,
1506         0xc318, 0xffffffff, 0x00020200,
1507         0x3350, 0xffffffff, 0x00000200,
1508         0x15c0, 0xffffffff, 0x00000400,
1509         0x55e8, 0xffffffff, 0x00000000,
1510         0x2f50, 0xffffffff, 0x00000902,
1511         0x3c000, 0xffffffff, 0x96940200,
1512         0x8708, 0xffffffff, 0x00900100,
1513         0xc424, 0xffffffff, 0x0020003f,
1514         0x38, 0xffffffff, 0x0140001c,
1515         0x3c, 0x000f0000, 0x000f0000,
1516         0x220, 0xffffffff, 0xc060000c,
1517         0x224, 0xc0000fff, 0x00000100,
1518         0xf90, 0xffffffff, 0x00000100,
1519         0xf98, 0x00000101, 0x00000000,
1520         0x20a8, 0xffffffff, 0x00000104,
1521         0x55e4, 0xff000fff, 0x00000100,
1522         0x30cc, 0xc0000fff, 0x00000104,
1523         0xc1e4, 0x00000001, 0x00000001,
1524         0xd00c, 0xff000ff0, 0x00000100,
1525         0xd80c, 0xff000ff0, 0x00000100
1526 };
1527
1528 static const u32 godavari_golden_registers[] =
1529 {
1530         0x55e4, 0xff607fff, 0xfc000100,
1531         0x6ed8, 0x00010101, 0x00010000,
1532         0x9830, 0xffffffff, 0x00000000,
1533         0x98302, 0xf00fffff, 0x00000400,
1534         0x6130, 0xffffffff, 0x00010000,
1535         0x5bb0, 0x000000f0, 0x00000070,
1536         0x5bc0, 0xf0311fff, 0x80300000,
1537         0x98f8, 0x73773777, 0x12010001,
1538         0x98fc, 0xffffffff, 0x00000010,
1539         0x8030, 0x00001f0f, 0x0000100a,
1540         0x2f48, 0x73773777, 0x12010001,
1541         0x2408, 0x000fffff, 0x000c007f,
1542         0x8a14, 0xf000003f, 0x00000007,
1543         0x8b24, 0xffffffff, 0x00ff0fff,
1544         0x30a04, 0x0000ff0f, 0x00000000,
1545         0x28a4c, 0x07ffffff, 0x06000000,
1546         0x4d8, 0x00000fff, 0x00000100,
1547         0xd014, 0x00010000, 0x00810001,
1548         0xd814, 0x00010000, 0x00810001,
1549         0x3e78, 0x00000001, 0x00000002,
1550         0xc768, 0x00000008, 0x00000008,
1551         0xc770, 0x00000f00, 0x00000800,
1552         0xc774, 0x00000f00, 0x00000800,
1553         0xc798, 0x00ffffff, 0x00ff7fbf,
1554         0xc79c, 0x00ffffff, 0x00ff7faf,
1555         0x8c00, 0x000000ff, 0x00000001,
1556         0x214f8, 0x01ff01ff, 0x00000002,
1557         0x21498, 0x007ff800, 0x00200000,
1558         0x2015c, 0xffffffff, 0x00000f40,
1559         0x88c4, 0x001f3ae3, 0x00000082,
1560         0x88d4, 0x0000001f, 0x00000010,
1561         0x30934, 0xffffffff, 0x00000000
1562 };
1563
1564
1565 static void cik_init_golden_registers(struct radeon_device *rdev)
1566 {
1567         /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1568         mutex_lock(&rdev->grbm_idx_mutex);
1569         switch (rdev->family) {
1570         case CHIP_BONAIRE:
1571                 radeon_program_register_sequence(rdev,
1572                                                  bonaire_mgcg_cgcg_init,
1573                                                  (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1574                 radeon_program_register_sequence(rdev,
1575                                                  bonaire_golden_registers,
1576                                                  (const u32)ARRAY_SIZE(bonaire_golden_registers));
1577                 radeon_program_register_sequence(rdev,
1578                                                  bonaire_golden_common_registers,
1579                                                  (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1580                 radeon_program_register_sequence(rdev,
1581                                                  bonaire_golden_spm_registers,
1582                                                  (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1583                 break;
1584         case CHIP_KABINI:
1585                 radeon_program_register_sequence(rdev,
1586                                                  kalindi_mgcg_cgcg_init,
1587                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1588                 radeon_program_register_sequence(rdev,
1589                                                  kalindi_golden_registers,
1590                                                  (const u32)ARRAY_SIZE(kalindi_golden_registers));
1591                 radeon_program_register_sequence(rdev,
1592                                                  kalindi_golden_common_registers,
1593                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1594                 radeon_program_register_sequence(rdev,
1595                                                  kalindi_golden_spm_registers,
1596                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1597                 break;
1598         case CHIP_MULLINS:
1599                 radeon_program_register_sequence(rdev,
1600                                                  kalindi_mgcg_cgcg_init,
1601                                                  (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1602                 radeon_program_register_sequence(rdev,
1603                                                  godavari_golden_registers,
1604                                                  (const u32)ARRAY_SIZE(godavari_golden_registers));
1605                 radeon_program_register_sequence(rdev,
1606                                                  kalindi_golden_common_registers,
1607                                                  (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1608                 radeon_program_register_sequence(rdev,
1609                                                  kalindi_golden_spm_registers,
1610                                                  (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1611                 break;
1612         case CHIP_KAVERI:
1613                 radeon_program_register_sequence(rdev,
1614                                                  spectre_mgcg_cgcg_init,
1615                                                  (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1616                 radeon_program_register_sequence(rdev,
1617                                                  spectre_golden_registers,
1618                                                  (const u32)ARRAY_SIZE(spectre_golden_registers));
1619                 radeon_program_register_sequence(rdev,
1620                                                  spectre_golden_common_registers,
1621                                                  (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1622                 radeon_program_register_sequence(rdev,
1623                                                  spectre_golden_spm_registers,
1624                                                  (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1625                 break;
1626         case CHIP_HAWAII:
1627                 radeon_program_register_sequence(rdev,
1628                                                  hawaii_mgcg_cgcg_init,
1629                                                  (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1630                 radeon_program_register_sequence(rdev,
1631                                                  hawaii_golden_registers,
1632                                                  (const u32)ARRAY_SIZE(hawaii_golden_registers));
1633                 radeon_program_register_sequence(rdev,
1634                                                  hawaii_golden_common_registers,
1635                                                  (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1636                 radeon_program_register_sequence(rdev,
1637                                                  hawaii_golden_spm_registers,
1638                                                  (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1639                 break;
1640         default:
1641                 break;
1642         }
1643         mutex_unlock(&rdev->grbm_idx_mutex);
1644 }
1645
1646 /**
1647  * cik_get_xclk - get the xclk
1648  *
1649  * @rdev: radeon_device pointer
1650  *
1651  * Returns the reference clock used by the gfx engine
1652  * (CIK).
1653  */
1654 u32 cik_get_xclk(struct radeon_device *rdev)
1655 {
1656         u32 reference_clock = rdev->clock.spll.reference_freq;
1657
1658         if (rdev->flags & RADEON_IS_IGP) {
1659                 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1660                         return reference_clock / 2;
1661         } else {
1662                 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1663                         return reference_clock / 4;
1664         }
1665         return reference_clock;
1666 }
1667
1668 /**
1669  * cik_mm_rdoorbell - read a doorbell dword
1670  *
1671  * @rdev: radeon_device pointer
1672  * @index: doorbell index
1673  *
1674  * Returns the value in the doorbell aperture at the
1675  * requested doorbell index (CIK).
1676  */
1677 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1678 {
1679         if (index < rdev->doorbell.num_doorbells) {
1680                 return readl(rdev->doorbell.ptr + index);
1681         } else {
1682                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1683                 return 0;
1684         }
1685 }
1686
1687 /**
1688  * cik_mm_wdoorbell - write a doorbell dword
1689  *
1690  * @rdev: radeon_device pointer
1691  * @index: doorbell index
1692  * @v: value to write
1693  *
1694  * Writes @v to the doorbell aperture at the
1695  * requested doorbell index (CIK).
1696  */
1697 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1698 {
1699         if (index < rdev->doorbell.num_doorbells) {
1700                 writel(v, rdev->doorbell.ptr + index);
1701         } else {
1702                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1703         }
1704 }
1705
1706 #define BONAIRE_IO_MC_REGS_SIZE 36
1707
1708 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1709 {
1710         {0x00000070, 0x04400000},
1711         {0x00000071, 0x80c01803},
1712         {0x00000072, 0x00004004},
1713         {0x00000073, 0x00000100},
1714         {0x00000074, 0x00ff0000},
1715         {0x00000075, 0x34000000},
1716         {0x00000076, 0x08000014},
1717         {0x00000077, 0x00cc08ec},
1718         {0x00000078, 0x00000400},
1719         {0x00000079, 0x00000000},
1720         {0x0000007a, 0x04090000},
1721         {0x0000007c, 0x00000000},
1722         {0x0000007e, 0x4408a8e8},
1723         {0x0000007f, 0x00000304},
1724         {0x00000080, 0x00000000},
1725         {0x00000082, 0x00000001},
1726         {0x00000083, 0x00000002},
1727         {0x00000084, 0xf3e4f400},
1728         {0x00000085, 0x052024e3},
1729         {0x00000087, 0x00000000},
1730         {0x00000088, 0x01000000},
1731         {0x0000008a, 0x1c0a0000},
1732         {0x0000008b, 0xff010000},
1733         {0x0000008d, 0xffffefff},
1734         {0x0000008e, 0xfff3efff},
1735         {0x0000008f, 0xfff3efbf},
1736         {0x00000092, 0xf7ffffff},
1737         {0x00000093, 0xffffff7f},
1738         {0x00000095, 0x00101101},
1739         {0x00000096, 0x00000fff},
1740         {0x00000097, 0x00116fff},
1741         {0x00000098, 0x60010000},
1742         {0x00000099, 0x10010000},
1743         {0x0000009a, 0x00006000},
1744         {0x0000009b, 0x00001000},
1745         {0x0000009f, 0x00b48000}
1746 };
1747
1748 #define HAWAII_IO_MC_REGS_SIZE 22
1749
1750 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1751 {
1752         {0x0000007d, 0x40000000},
1753         {0x0000007e, 0x40180304},
1754         {0x0000007f, 0x0000ff00},
1755         {0x00000081, 0x00000000},
1756         {0x00000083, 0x00000800},
1757         {0x00000086, 0x00000000},
1758         {0x00000087, 0x00000100},
1759         {0x00000088, 0x00020100},
1760         {0x00000089, 0x00000000},
1761         {0x0000008b, 0x00040000},
1762         {0x0000008c, 0x00000100},
1763         {0x0000008e, 0xff010000},
1764         {0x00000090, 0xffffefff},
1765         {0x00000091, 0xfff3efff},
1766         {0x00000092, 0xfff3efbf},
1767         {0x00000093, 0xf7ffffff},
1768         {0x00000094, 0xffffff7f},
1769         {0x00000095, 0x00000fff},
1770         {0x00000096, 0x00116fff},
1771         {0x00000097, 0x60010000},
1772         {0x00000098, 0x10010000},
1773         {0x0000009f, 0x00c79000}
1774 };
1775
1776
1777 /**
1778  * cik_srbm_select - select specific register instances
1779  *
1780  * @rdev: radeon_device pointer
1781  * @me: selected ME (micro engine)
1782  * @pipe: pipe
1783  * @queue: queue
1784  * @vmid: VMID
1785  *
1786  * Switches the currently active registers instances.  Some
1787  * registers are instanced per VMID, others are instanced per
1788  * me/pipe/queue combination.
1789  */
1790 static void cik_srbm_select(struct radeon_device *rdev,
1791                             u32 me, u32 pipe, u32 queue, u32 vmid)
1792 {
1793         u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1794                              MEID(me & 0x3) |
1795                              VMID(vmid & 0xf) |
1796                              QUEUEID(queue & 0x7));
1797         WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1798 }
1799
1800 /* ucode loading */
1801 /**
1802  * ci_mc_load_microcode - load MC ucode into the hw
1803  *
1804  * @rdev: radeon_device pointer
1805  *
1806  * Load the GDDR MC ucode into the hw (CIK).
1807  * Returns 0 on success, error on failure.
1808  */
1809 int ci_mc_load_microcode(struct radeon_device *rdev)
1810 {
1811         const __be32 *fw_data = NULL;
1812         const __le32 *new_fw_data = NULL;
1813         u32 running, blackout = 0, tmp;
1814         u32 *io_mc_regs = NULL;
1815         const __le32 *new_io_mc_regs = NULL;
1816         int i, regs_size, ucode_size;
1817
1818         if (!rdev->mc_fw)
1819                 return -EINVAL;
1820
1821         if (rdev->new_fw) {
1822                 const struct mc_firmware_header_v1_0 *hdr =
1823                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1824
1825                 radeon_ucode_print_mc_hdr(&hdr->header);
1826
1827                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1828                 new_io_mc_regs = (const __le32 *)
1829                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1830                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1831                 new_fw_data = (const __le32 *)
1832                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1833         } else {
1834                 ucode_size = rdev->mc_fw->size / 4;
1835
1836                 switch (rdev->family) {
1837                 case CHIP_BONAIRE:
1838                         io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1839                         regs_size = BONAIRE_IO_MC_REGS_SIZE;
1840                         break;
1841                 case CHIP_HAWAII:
1842                         io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1843                         regs_size = HAWAII_IO_MC_REGS_SIZE;
1844                         break;
1845                 default:
1846                         return -EINVAL;
1847                 }
1848                 fw_data = (const __be32 *)rdev->mc_fw->data;
1849         }
1850
1851         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1852
1853         if (running == 0) {
1854                 if (running) {
1855                         blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1856                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1857                 }
1858
1859                 /* reset the engine and set to writable */
1860                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1861                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1862
1863                 /* load mc io regs */
1864                 for (i = 0; i < regs_size; i++) {
1865                         if (rdev->new_fw) {
1866                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1867                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1868                         } else {
1869                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1870                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1871                         }
1872                 }
1873
1874                 tmp = RREG32(MC_SEQ_MISC0);
1875                 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1876                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1877                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1878                         WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1879                         WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1880                 }
1881
1882                 /* load the MC ucode */
1883                 for (i = 0; i < ucode_size; i++) {
1884                         if (rdev->new_fw)
1885                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1886                         else
1887                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1888                 }
1889
1890                 /* put the engine back into the active state */
1891                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1892                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1893                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1894
1895                 /* wait for training to complete */
1896                 for (i = 0; i < rdev->usec_timeout; i++) {
1897                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1898                                 break;
1899                         udelay(1);
1900                 }
1901                 for (i = 0; i < rdev->usec_timeout; i++) {
1902                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1903                                 break;
1904                         udelay(1);
1905                 }
1906
1907                 if (running)
1908                         WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1909         }
1910
1911         return 0;
1912 }
1913
1914 /**
1915  * cik_init_microcode - load ucode images from disk
1916  *
1917  * @rdev: radeon_device pointer
1918  *
1919  * Use the firmware interface to load the ucode images into
1920  * the driver (not loaded into hw).
1921  * Returns 0 on success, error on failure.
1922  */
1923 static int cik_init_microcode(struct radeon_device *rdev)
1924 {
1925         const char *chip_name;
1926         const char *new_chip_name;
1927         size_t pfp_req_size, me_req_size, ce_req_size,
1928                 mec_req_size, rlc_req_size, mc_req_size = 0,
1929                 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1930         char fw_name[30];
1931         int new_fw = 0;
1932         int err;
1933         int num_fw;
1934
1935         DRM_DEBUG("\n");
1936
1937         switch (rdev->family) {
1938         case CHIP_BONAIRE:
1939                 chip_name = "BONAIRE";
1940                 new_chip_name = "bonaire";
1941                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1942                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1943                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1944                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1945                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1946                 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1947                 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1948                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1949                 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1950                 num_fw = 8;
1951                 break;
1952         case CHIP_HAWAII:
1953                 chip_name = "HAWAII";
1954                 new_chip_name = "hawaii";
1955                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1956                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1957                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1958                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1959                 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1960                 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1961                 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1962                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1963                 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1964                 num_fw = 8;
1965                 break;
1966         case CHIP_KAVERI:
1967                 chip_name = "KAVERI";
1968                 new_chip_name = "kaveri";
1969                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1970                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1971                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1972                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1973                 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1974                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1975                 num_fw = 7;
1976                 break;
1977         case CHIP_KABINI:
1978                 chip_name = "KABINI";
1979                 new_chip_name = "kabini";
1980                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1981                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1982                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1983                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1984                 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1985                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1986                 num_fw = 6;
1987                 break;
1988         case CHIP_MULLINS:
1989                 chip_name = "MULLINS";
1990                 new_chip_name = "mullins";
1991                 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1992                 me_req_size = CIK_ME_UCODE_SIZE * 4;
1993                 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1994                 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1995                 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1996                 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997                 num_fw = 6;
1998                 break;
1999         default: BUG();
2000         }
2001
2002         DRM_INFO("Loading %s Microcode\n", new_chip_name);
2003
2004         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2005         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2006         if (err) {
2007                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2008                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2009                 if (err)
2010                         goto out;
2011                 if (rdev->pfp_fw->size != pfp_req_size) {
2012                         printk(KERN_ERR
2013                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2014                                rdev->pfp_fw->size, fw_name);
2015                         err = -EINVAL;
2016                         goto out;
2017                 }
2018         } else {
2019                 err = radeon_ucode_validate(rdev->pfp_fw);
2020                 if (err) {
2021                         printk(KERN_ERR
2022                                "cik_fw: validation failed for firmware \"%s\"\n",
2023                                fw_name);
2024                         goto out;
2025                 } else {
2026                         new_fw++;
2027                 }
2028         }
2029
2030         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2031         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2032         if (err) {
2033                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2034                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2035                 if (err)
2036                         goto out;
2037                 if (rdev->me_fw->size != me_req_size) {
2038                         printk(KERN_ERR
2039                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2040                                rdev->me_fw->size, fw_name);
2041                         err = -EINVAL;
2042                 }
2043         } else {
2044                 err = radeon_ucode_validate(rdev->me_fw);
2045                 if (err) {
2046                         printk(KERN_ERR
2047                                "cik_fw: validation failed for firmware \"%s\"\n",
2048                                fw_name);
2049                         goto out;
2050                 } else {
2051                         new_fw++;
2052                 }
2053         }
2054
2055         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2056         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2057         if (err) {
2058                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2059                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2060                 if (err)
2061                         goto out;
2062                 if (rdev->ce_fw->size != ce_req_size) {
2063                         printk(KERN_ERR
2064                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2065                                rdev->ce_fw->size, fw_name);
2066                         err = -EINVAL;
2067                 }
2068         } else {
2069                 err = radeon_ucode_validate(rdev->ce_fw);
2070                 if (err) {
2071                         printk(KERN_ERR
2072                                "cik_fw: validation failed for firmware \"%s\"\n",
2073                                fw_name);
2074                         goto out;
2075                 } else {
2076                         new_fw++;
2077                 }
2078         }
2079
2080         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2081         err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2082         if (err) {
2083                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2084                 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2085                 if (err)
2086                         goto out;
2087                 if (rdev->mec_fw->size != mec_req_size) {
2088                         printk(KERN_ERR
2089                                "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2090                                rdev->mec_fw->size, fw_name);
2091                         err = -EINVAL;
2092                 }
2093         } else {
2094                 err = radeon_ucode_validate(rdev->mec_fw);
2095                 if (err) {
2096                         printk(KERN_ERR
2097                                "cik_fw: validation failed for firmware \"%s\"\n",
2098                                fw_name);
2099                         goto out;
2100                 } else {
2101                         new_fw++;
2102                 }
2103         }
2104
2105         if (rdev->family == CHIP_KAVERI) {
2106                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2107                 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2108                 if (err) {
2109                         goto out;
2110                 } else {
2111                         err = radeon_ucode_validate(rdev->mec2_fw);
2112                         if (err) {
2113                                 goto out;
2114                         } else {
2115                                 new_fw++;
2116                         }
2117                 }
2118         }
2119
2120         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2121         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2122         if (err) {
2123                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2124                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2125                 if (err)
2126                         goto out;
2127                 if (rdev->rlc_fw->size != rlc_req_size) {
2128                         printk(KERN_ERR
2129                                "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2130                                rdev->rlc_fw->size, fw_name);
2131                         err = -EINVAL;
2132                 }
2133         } else {
2134                 err = radeon_ucode_validate(rdev->rlc_fw);
2135                 if (err) {
2136                         printk(KERN_ERR
2137                                "cik_fw: validation failed for firmware \"%s\"\n",
2138                                fw_name);
2139                         goto out;
2140                 } else {
2141                         new_fw++;
2142                 }
2143         }
2144
2145         snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2146         err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2147         if (err) {
2148                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2149                 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2150                 if (err)
2151                         goto out;
2152                 if (rdev->sdma_fw->size != sdma_req_size) {
2153                         printk(KERN_ERR
2154                                "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2155                                rdev->sdma_fw->size, fw_name);
2156                         err = -EINVAL;
2157                 }
2158         } else {
2159                 err = radeon_ucode_validate(rdev->sdma_fw);
2160                 if (err) {
2161                         printk(KERN_ERR
2162                                "cik_fw: validation failed for firmware \"%s\"\n",
2163                                fw_name);
2164                         goto out;
2165                 } else {
2166                         new_fw++;
2167                 }
2168         }
2169
2170         /* No SMC, MC ucode on APUs */
2171         if (!(rdev->flags & RADEON_IS_IGP)) {
2172                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2173                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2174                 if (err) {
2175                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2176                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2177                         if (err) {
2178                                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2179                                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2180                                 if (err)
2181                                         goto out;
2182                         }
2183                         if ((rdev->mc_fw->size != mc_req_size) &&
2184                             (rdev->mc_fw->size != mc2_req_size)){
2185                                 printk(KERN_ERR
2186                                        "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2187                                        rdev->mc_fw->size, fw_name);
2188                                 err = -EINVAL;
2189                         }
2190                         DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2191                 } else {
2192                         err = radeon_ucode_validate(rdev->mc_fw);
2193                         if (err) {
2194                                 printk(KERN_ERR
2195                                        "cik_fw: validation failed for firmware \"%s\"\n",
2196                                        fw_name);
2197                                 goto out;
2198                         } else {
2199                                 new_fw++;
2200                         }
2201                 }
2202
2203                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2204                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2205                 if (err) {
2206                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2207                         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2208                         if (err) {
2209                                 printk(KERN_ERR
2210                                        "smc: error loading firmware \"%s\"\n",
2211                                        fw_name);
2212                                 release_firmware(rdev->smc_fw);
2213                                 rdev->smc_fw = NULL;
2214                                 err = 0;
2215                         } else if (rdev->smc_fw->size != smc_req_size) {
2216                                 printk(KERN_ERR
2217                                        "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2218                                        rdev->smc_fw->size, fw_name);
2219                                 err = -EINVAL;
2220                         }
2221                 } else {
2222                         err = radeon_ucode_validate(rdev->smc_fw);
2223                         if (err) {
2224                                 printk(KERN_ERR
2225                                        "cik_fw: validation failed for firmware \"%s\"\n",
2226                                        fw_name);
2227                                 goto out;
2228                         } else {
2229                                 new_fw++;
2230                         }
2231                 }
2232         }
2233
2234         if (new_fw == 0) {
2235                 rdev->new_fw = false;
2236         } else if (new_fw < num_fw) {
2237                 printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2238                 err = -EINVAL;
2239         } else {
2240                 rdev->new_fw = true;
2241         }
2242
2243 out:
2244         if (err) {
2245                 if (err != -EINVAL)
2246                         printk(KERN_ERR
2247                                "cik_cp: Failed to load firmware \"%s\"\n",
2248                                fw_name);
2249                 release_firmware(rdev->pfp_fw);
2250                 rdev->pfp_fw = NULL;
2251                 release_firmware(rdev->me_fw);
2252                 rdev->me_fw = NULL;
2253                 release_firmware(rdev->ce_fw);
2254                 rdev->ce_fw = NULL;
2255                 release_firmware(rdev->mec_fw);
2256                 rdev->mec_fw = NULL;
2257                 release_firmware(rdev->mec2_fw);
2258                 rdev->mec2_fw = NULL;
2259                 release_firmware(rdev->rlc_fw);
2260                 rdev->rlc_fw = NULL;
2261                 release_firmware(rdev->sdma_fw);
2262                 rdev->sdma_fw = NULL;
2263                 release_firmware(rdev->mc_fw);
2264                 rdev->mc_fw = NULL;
2265                 release_firmware(rdev->smc_fw);
2266                 rdev->smc_fw = NULL;
2267         }
2268         return err;
2269 }
2270
2271 /*
2272  * Core functions
2273  */
2274 /**
2275  * cik_tiling_mode_table_init - init the hw tiling table
2276  *
2277  * @rdev: radeon_device pointer
2278  *
2279  * Starting with SI, the tiling setup is done globally in a
2280  * set of 32 tiling modes.  Rather than selecting each set of
2281  * parameters per surface as on older asics, we just select
2282  * which index in the tiling table we want to use, and the
2283  * surface uses those parameters (CIK).
2284  */
2285 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2286 {
2287         const u32 num_tile_mode_states = 32;
2288         const u32 num_secondary_tile_mode_states = 16;
2289         u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2290         u32 num_pipe_configs;
2291         u32 num_rbs = rdev->config.cik.max_backends_per_se *
2292                 rdev->config.cik.max_shader_engines;
2293
2294         switch (rdev->config.cik.mem_row_size_in_kb) {
2295         case 1:
2296                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2297                 break;
2298         case 2:
2299         default:
2300                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2301                 break;
2302         case 4:
2303                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2304                 break;
2305         }
2306
2307         num_pipe_configs = rdev->config.cik.max_tile_pipes;
2308         if (num_pipe_configs > 8)
2309                 num_pipe_configs = 16;
2310
2311         if (num_pipe_configs == 16) {
2312                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2313                         switch (reg_offset) {
2314                         case 0:
2315                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2316                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2317                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2319                                 break;
2320                         case 1:
2321                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2323                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2325                                 break;
2326                         case 2:
2327                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2329                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2331                                 break;
2332                         case 3:
2333                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2334                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2335                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2337                                 break;
2338                         case 4:
2339                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2341                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                                  TILE_SPLIT(split_equal_to_row_size));
2343                                 break;
2344                         case 5:
2345                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2346                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2347                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348                                 break;
2349                         case 6:
2350                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2351                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2352                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2353                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2354                                 break;
2355                         case 7:
2356                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2357                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2358                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                                  TILE_SPLIT(split_equal_to_row_size));
2360                                 break;
2361                         case 8:
2362                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2363                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2364                                 break;
2365                         case 9:
2366                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2367                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2369                                 break;
2370                         case 10:
2371                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2372                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2373                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2375                                 break;
2376                         case 11:
2377                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2378                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2380                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381                                 break;
2382                         case 12:
2383                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2384                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387                                 break;
2388                         case 13:
2389                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2390                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2392                                 break;
2393                         case 14:
2394                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2395                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2396                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2397                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2398                                 break;
2399                         case 16:
2400                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2401                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2402                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2403                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2404                                 break;
2405                         case 17:
2406                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2407                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2408                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410                                 break;
2411                         case 27:
2412                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2413                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2415                                 break;
2416                         case 28:
2417                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2419                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421                                 break;
2422                         case 29:
2423                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2424                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2426                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427                                 break;
2428                         case 30:
2429                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2431                                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                                 break;
2434                         default:
2435                                 gb_tile_moden = 0;
2436                                 break;
2437                         }
2438                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2439                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2440                 }
2441                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2442                         switch (reg_offset) {
2443                         case 0:
2444                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2446                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2447                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2448                                 break;
2449                         case 1:
2450                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2452                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2454                                 break;
2455                         case 2:
2456                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2460                                 break;
2461                         case 3:
2462                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2464                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2466                                 break;
2467                         case 4:
2468                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2470                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2471                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2472                                 break;
2473                         case 5:
2474                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2476                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2477                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2478                                 break;
2479                         case 6:
2480                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2484                                 break;
2485                         case 8:
2486                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2488                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2489                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2490                                 break;
2491                         case 9:
2492                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2494                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2496                                 break;
2497                         case 10:
2498                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2501                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2502                                 break;
2503                         case 11:
2504                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2508                                 break;
2509                         case 12:
2510                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2513                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2514                                 break;
2515                         case 13:
2516                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2520                                 break;
2521                         case 14:
2522                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2523                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2524                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2525                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2526                                 break;
2527                         default:
2528                                 gb_tile_moden = 0;
2529                                 break;
2530                         }
2531                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2532                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2533                 }
2534         } else if (num_pipe_configs == 8) {
2535                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2536                         switch (reg_offset) {
2537                         case 0:
2538                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2539                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2540                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2542                                 break;
2543                         case 1:
2544                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2548                                 break;
2549                         case 2:
2550                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2552                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2554                                 break;
2555                         case 3:
2556                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2557                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2558                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2560                                 break;
2561                         case 4:
2562                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2563                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2564                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                                                  TILE_SPLIT(split_equal_to_row_size));
2566                                 break;
2567                         case 5:
2568                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2569                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2570                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2571                                 break;
2572                         case 6:
2573                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2574                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2575                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2577                                 break;
2578                         case 7:
2579                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2580                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2581                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                                  TILE_SPLIT(split_equal_to_row_size));
2583                                 break;
2584                         case 8:
2585                                 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2586                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2587                                 break;
2588                         case 9:
2589                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2590                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2592                                 break;
2593                         case 10:
2594                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2596                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598                                 break;
2599                         case 11:
2600                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2601                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2602                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2603                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2604                                 break;
2605                         case 12:
2606                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2607                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2608                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2610                                 break;
2611                         case 13:
2612                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2613                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2615                                 break;
2616                         case 14:
2617                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2619                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621                                 break;
2622                         case 16:
2623                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2624                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2625                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2626                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627                                 break;
2628                         case 17:
2629                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2630                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2633                                 break;
2634                         case 27:
2635                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2636                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2637                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2638                                 break;
2639                         case 28:
2640                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2642                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2643                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644                                 break;
2645                         case 29:
2646                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2648                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2649                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                                 break;
2651                         case 30:
2652                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2654                                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2655                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656                                 break;
2657                         default:
2658                                 gb_tile_moden = 0;
2659                                 break;
2660                         }
2661                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2662                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2663                 }
2664                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2665                         switch (reg_offset) {
2666                         case 0:
2667                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2669                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2670                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2671                                 break;
2672                         case 1:
2673                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2674                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2675                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2676                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2677                                 break;
2678                         case 2:
2679                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2682                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2683                                 break;
2684                         case 3:
2685                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2686                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2687                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2688                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2689                                 break;
2690                         case 4:
2691                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2692                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2693                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2694                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2695                                 break;
2696                         case 5:
2697                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2698                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2699                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2700                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2701                                 break;
2702                         case 6:
2703                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2704                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2705                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2706                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2707                                 break;
2708                         case 8:
2709                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2711                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2712                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2713                                 break;
2714                         case 9:
2715                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2717                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2719                                 break;
2720                         case 10:
2721                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2723                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2724                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2725                                 break;
2726                         case 11:
2727                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2729                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2730                                                  NUM_BANKS(ADDR_SURF_16_BANK));
2731                                 break;
2732                         case 12:
2733                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2735                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2736                                                  NUM_BANKS(ADDR_SURF_8_BANK));
2737                                 break;
2738                         case 13:
2739                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2742                                                  NUM_BANKS(ADDR_SURF_4_BANK));
2743                                 break;
2744                         case 14:
2745                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2747                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2748                                                  NUM_BANKS(ADDR_SURF_2_BANK));
2749                                 break;
2750                         default:
2751                                 gb_tile_moden = 0;
2752                                 break;
2753                         }
2754                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2755                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2756                 }
2757         } else if (num_pipe_configs == 4) {
2758                 if (num_rbs == 4) {
2759                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2760                                 switch (reg_offset) {
2761                                 case 0:
2762                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2763                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2764                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2766                                         break;
2767                                 case 1:
2768                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2772                                         break;
2773                                 case 2:
2774                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2776                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2778                                         break;
2779                                 case 3:
2780                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2782                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2784                                         break;
2785                                 case 4:
2786                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2788                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789                                                          TILE_SPLIT(split_equal_to_row_size));
2790                                         break;
2791                                 case 5:
2792                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2793                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2795                                         break;
2796                                 case 6:
2797                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2798                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2799                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2801                                         break;
2802                                 case 7:
2803                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2805                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806                                                          TILE_SPLIT(split_equal_to_row_size));
2807                                         break;
2808                                 case 8:
2809                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2810                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16));
2811                                         break;
2812                                 case 9:
2813                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2814                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2816                                         break;
2817                                 case 10:
2818                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2819                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2820                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                                         break;
2823                                 case 11:
2824                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2825                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2826                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2827                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2828                                         break;
2829                                 case 12:
2830                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2831                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2833                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2834                                         break;
2835                                 case 13:
2836                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2837                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2839                                         break;
2840                                 case 14:
2841                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2842                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2843                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2844                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2845                                         break;
2846                                 case 16:
2847                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2848                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2849                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2850                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2851                                         break;
2852                                 case 17:
2853                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2854                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2855                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2856                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2857                                         break;
2858                                 case 27:
2859                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2860                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2861                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2862                                         break;
2863                                 case 28:
2864                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2865                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2866                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2867                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2868                                         break;
2869                                 case 29:
2870                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2871                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2872                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2873                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2874                                         break;
2875                                 case 30:
2876                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2877                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2878                                                          PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2879                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2880                                         break;
2881                                 default:
2882                                         gb_tile_moden = 0;
2883                                         break;
2884                                 }
2885                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2886                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2887                         }
2888                 } else if (num_rbs < 4) {
2889                         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2890                                 switch (reg_offset) {
2891                                 case 0:
2892                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2894                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2895                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2896                                         break;
2897                                 case 1:
2898                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2900                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2901                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2902                                         break;
2903                                 case 2:
2904                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2907                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908                                         break;
2909                                 case 3:
2910                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2912                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2913                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2914                                         break;
2915                                 case 4:
2916                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2918                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2919                                                          TILE_SPLIT(split_equal_to_row_size));
2920                                         break;
2921                                 case 5:
2922                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2923                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2924                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925                                         break;
2926                                 case 6:
2927                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2929                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2930                                                          TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2931                                         break;
2932                                 case 7:
2933                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2934                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2935                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2936                                                          TILE_SPLIT(split_equal_to_row_size));
2937                                         break;
2938                                 case 8:
2939                                         gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2940                                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2941                                         break;
2942                                 case 9:
2943                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2944                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2945                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2946                                         break;
2947                                 case 10:
2948                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2949                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2951                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2952                                         break;
2953                                 case 11:
2954                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2955                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2956                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2957                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958                                         break;
2959                                 case 12:
2960                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2961                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2962                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2963                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964                                         break;
2965                                 case 13:
2966                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2967                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2968                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2969                                         break;
2970                                 case 14:
2971                                         gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2972                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2974                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2975                                         break;
2976                                 case 16:
2977                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2978                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2979                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2980                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2981                                         break;
2982                                 case 17:
2983                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2984                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2985                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2986                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2987                                         break;
2988                                 case 27:
2989                                         gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2990                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2991                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2992                                         break;
2993                                 case 28:
2994                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2995                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2996                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2997                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2998                                         break;
2999                                 case 29:
3000                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3001                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3002                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3003                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3004                                         break;
3005                                 case 30:
3006                                         gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3007                                                          MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3008                                                          PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3009                                                          SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010                                         break;
3011                                 default:
3012                                         gb_tile_moden = 0;
3013                                         break;
3014                                 }
3015                                 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3016                                 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3017                         }
3018                 }
3019                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3020                         switch (reg_offset) {
3021                         case 0:
3022                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3023                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3024                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3025                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3026                                 break;
3027                         case 1:
3028                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3030                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3032                                 break;
3033                         case 2:
3034                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3036                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3037                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3038                                 break;
3039                         case 3:
3040                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3041                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3042                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3043                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3044                                 break;
3045                         case 4:
3046                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3049                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3050                                 break;
3051                         case 5:
3052                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3054                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3055                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3056                                 break;
3057                         case 6:
3058                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3060                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3061                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3062                                 break;
3063                         case 8:
3064                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3065                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3066                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3068                                 break;
3069                         case 9:
3070                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3071                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3072                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3073                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3074                                 break;
3075                         case 10:
3076                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3078                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3079                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3080                                 break;
3081                         case 11:
3082                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3084                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3085                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3086                                 break;
3087                         case 12:
3088                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3091                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3092                                 break;
3093                         case 13:
3094                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3095                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3096                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3097                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3098                                 break;
3099                         case 14:
3100                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3101                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3102                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3103                                                  NUM_BANKS(ADDR_SURF_4_BANK));
3104                                 break;
3105                         default:
3106                                 gb_tile_moden = 0;
3107                                 break;
3108                         }
3109                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3110                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3111                 }
3112         } else if (num_pipe_configs == 2) {
3113                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3114                         switch (reg_offset) {
3115                         case 0:
3116                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3117                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3118                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3119                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3120                                 break;
3121                         case 1:
3122                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3123                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3124                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3126                                 break;
3127                         case 2:
3128                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3129                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3130                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3131                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3132                                 break;
3133                         case 3:
3134                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3136                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3138                                 break;
3139                         case 4:
3140                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3141                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3142                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3143                                                  TILE_SPLIT(split_equal_to_row_size));
3144                                 break;
3145                         case 5:
3146                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3147                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3148                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3149                                 break;
3150                         case 6:
3151                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3152                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3153                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3154                                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3155                                 break;
3156                         case 7:
3157                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3158                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3159                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3160                                                  TILE_SPLIT(split_equal_to_row_size));
3161                                 break;
3162                         case 8:
3163                                 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3164                                                 PIPE_CONFIG(ADDR_SURF_P2);
3165                                 break;
3166                         case 9:
3167                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3168                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3169                                                  PIPE_CONFIG(ADDR_SURF_P2));
3170                                 break;
3171                         case 10:
3172                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3173                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3174                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3175                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3176                                 break;
3177                         case 11:
3178                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3179                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3180                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3181                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3182                                 break;
3183                         case 12:
3184                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3185                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3186                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3187                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3188                                 break;
3189                         case 13:
3190                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3191                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3192                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3193                                 break;
3194                         case 14:
3195                                 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3196                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3197                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199                                 break;
3200                         case 16:
3201                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3202                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3203                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3204                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3205                                 break;
3206                         case 17:
3207                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3208                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3209                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3210                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3211                                 break;
3212                         case 27:
3213                                 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3214                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3215                                                  PIPE_CONFIG(ADDR_SURF_P2));
3216                                 break;
3217                         case 28:
3218                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3219                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3220                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3221                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222                                 break;
3223                         case 29:
3224                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3225                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3226                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3227                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3228                                 break;
3229                         case 30:
3230                                 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3231                                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3232                                                  PIPE_CONFIG(ADDR_SURF_P2) |
3233                                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3234                                 break;
3235                         default:
3236                                 gb_tile_moden = 0;
3237                                 break;
3238                         }
3239                         rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3240                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3241                 }
3242                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3243                         switch (reg_offset) {
3244                         case 0:
3245                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3247                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3249                                 break;
3250                         case 1:
3251                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3252                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3253                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3254                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3255                                 break;
3256                         case 2:
3257                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3259                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3260                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3261                                 break;
3262                         case 3:
3263                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3264                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3265                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3266                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3267                                 break;
3268                         case 4:
3269                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3270                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3271                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3272                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3273                                 break;
3274                         case 5:
3275                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3276                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3277                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3278                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3279                                 break;
3280                         case 6:
3281                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3282                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3283                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3284                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3285                                 break;
3286                         case 8:
3287                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3288                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3289                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3290                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3291                                 break;
3292                         case 9:
3293                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3294                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3295                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3296                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3297                                 break;
3298                         case 10:
3299                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3300                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3301                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3302                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3303                                 break;
3304                         case 11:
3305                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3306                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3307                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3308                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3309                                 break;
3310                         case 12:
3311                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3312                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3313                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3315                                 break;
3316                         case 13:
3317                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3318                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3319                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320                                                  NUM_BANKS(ADDR_SURF_16_BANK));
3321                                 break;
3322                         case 14:
3323                                 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324                                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325                                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3326                                                  NUM_BANKS(ADDR_SURF_8_BANK));
3327                                 break;
3328                         default:
3329                                 gb_tile_moden = 0;
3330                                 break;
3331                         }
3332                         rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3333                         WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3334                 }
3335         } else
3336                 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3337 }
3338
3339 /**
3340  * cik_select_se_sh - select which SE, SH to address
3341  *
3342  * @rdev: radeon_device pointer
3343  * @se_num: shader engine to address
3344  * @sh_num: sh block to address
3345  *
3346  * Select which SE, SH combinations to address. Certain
3347  * registers are instanced per SE or SH.  0xffffffff means
3348  * broadcast to all SEs or SHs (CIK).
3349  */
3350 static void cik_select_se_sh(struct radeon_device *rdev,
3351                              u32 se_num, u32 sh_num)
3352 {
3353         u32 data = INSTANCE_BROADCAST_WRITES;
3354
3355         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3356                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3357         else if (se_num == 0xffffffff)
3358                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3359         else if (sh_num == 0xffffffff)
3360                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3361         else
3362                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3363         WREG32(GRBM_GFX_INDEX, data);
3364 }
3365
3366 /**
3367  * cik_create_bitmask - create a bitmask
3368  *
3369  * @bit_width: length of the mask
3370  *
3371  * create a variable length bit mask (CIK).
3372  * Returns the bitmask.
3373  */
3374 static u32 cik_create_bitmask(u32 bit_width)
3375 {
3376         u32 i, mask = 0;
3377
3378         for (i = 0; i < bit_width; i++) {
3379                 mask <<= 1;
3380                 mask |= 1;
3381         }
3382         return mask;
3383 }
3384
3385 /**
3386  * cik_get_rb_disabled - computes the mask of disabled RBs
3387  *
3388  * @rdev: radeon_device pointer
3389  * @max_rb_num: max RBs (render backends) for the asic
3390  * @se_num: number of SEs (shader engines) for the asic
3391  * @sh_per_se: number of SH blocks per SE for the asic
3392  *
3393  * Calculates the bitmask of disabled RBs (CIK).
3394  * Returns the disabled RB bitmask.
3395  */
3396 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3397                               u32 max_rb_num_per_se,
3398                               u32 sh_per_se)
3399 {
3400         u32 data, mask;
3401
3402         data = RREG32(CC_RB_BACKEND_DISABLE);
3403         if (data & 1)
3404                 data &= BACKEND_DISABLE_MASK;
3405         else
3406                 data = 0;
3407         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3408
3409         data >>= BACKEND_DISABLE_SHIFT;
3410
3411         mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3412
3413         return data & mask;
3414 }
3415
3416 /**
3417  * cik_setup_rb - setup the RBs on the asic
3418  *
3419  * @rdev: radeon_device pointer
3420  * @se_num: number of SEs (shader engines) for the asic
3421  * @sh_per_se: number of SH blocks per SE for the asic
3422  * @max_rb_num: max RBs (render backends) for the asic
3423  *
3424  * Configures per-SE/SH RB registers (CIK).
3425  */
3426 static void cik_setup_rb(struct radeon_device *rdev,
3427                          u32 se_num, u32 sh_per_se,
3428                          u32 max_rb_num_per_se)
3429 {
3430         int i, j;
3431         u32 data, mask;
3432         u32 disabled_rbs = 0;
3433         u32 enabled_rbs = 0;
3434
3435         mutex_lock(&rdev->grbm_idx_mutex);
3436         for (i = 0; i < se_num; i++) {
3437                 for (j = 0; j < sh_per_se; j++) {
3438                         cik_select_se_sh(rdev, i, j);
3439                         data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3440                         if (rdev->family == CHIP_HAWAII)
3441                                 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3442                         else
3443                                 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3444                 }
3445         }
3446         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3447         mutex_unlock(&rdev->grbm_idx_mutex);
3448
3449         mask = 1;
3450         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3451                 if (!(disabled_rbs & mask))
3452                         enabled_rbs |= mask;
3453                 mask <<= 1;
3454         }
3455
3456         rdev->config.cik.backend_enable_mask = enabled_rbs;
3457
3458         mutex_lock(&rdev->grbm_idx_mutex);
3459         for (i = 0; i < se_num; i++) {
3460                 cik_select_se_sh(rdev, i, 0xffffffff);
3461                 data = 0;
3462                 for (j = 0; j < sh_per_se; j++) {
3463                         switch (enabled_rbs & 3) {
3464                         case 0:
3465                                 if (j == 0)
3466                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3467                                 else
3468                                         data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3469                                 break;
3470                         case 1:
3471                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3472                                 break;
3473                         case 2:
3474                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3475                                 break;
3476                         case 3:
3477                         default:
3478                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3479                                 break;
3480                         }
3481                         enabled_rbs >>= 2;
3482                 }
3483                 WREG32(PA_SC_RASTER_CONFIG, data);
3484         }
3485         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3486         mutex_unlock(&rdev->grbm_idx_mutex);
3487 }
3488
3489 /**
3490  * cik_gpu_init - setup the 3D engine
3491  *
3492  * @rdev: radeon_device pointer
3493  *
3494  * Configures the 3D engine and tiling configuration
3495  * registers so that the 3D engine is usable.
3496  */
3497 static void cik_gpu_init(struct radeon_device *rdev)
3498 {
3499         u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3500         u32 mc_shared_chmap, mc_arb_ramcfg;
3501         u32 hdp_host_path_cntl;
3502         u32 tmp;
3503         int i, j;
3504
3505         switch (rdev->family) {
3506         case CHIP_BONAIRE:
3507                 rdev->config.cik.max_shader_engines = 2;
3508                 rdev->config.cik.max_tile_pipes = 4;
3509                 rdev->config.cik.max_cu_per_sh = 7;
3510                 rdev->config.cik.max_sh_per_se = 1;
3511                 rdev->config.cik.max_backends_per_se = 2;
3512                 rdev->config.cik.max_texture_channel_caches = 4;
3513                 rdev->config.cik.max_gprs = 256;
3514                 rdev->config.cik.max_gs_threads = 32;
3515                 rdev->config.cik.max_hw_contexts = 8;
3516
3517                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3518                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3519                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3520                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3521                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3522                 break;
3523         case CHIP_HAWAII:
3524                 rdev->config.cik.max_shader_engines = 4;
3525                 rdev->config.cik.max_tile_pipes = 16;
3526                 rdev->config.cik.max_cu_per_sh = 11;
3527                 rdev->config.cik.max_sh_per_se = 1;
3528                 rdev->config.cik.max_backends_per_se = 4;
3529                 rdev->config.cik.max_texture_channel_caches = 16;
3530                 rdev->config.cik.max_gprs = 256;
3531                 rdev->config.cik.max_gs_threads = 32;
3532                 rdev->config.cik.max_hw_contexts = 8;
3533
3534                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3535                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3536                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3537                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3538                 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3539                 break;
3540         case CHIP_KAVERI:
3541                 rdev->config.cik.max_shader_engines = 1;
3542                 rdev->config.cik.max_tile_pipes = 4;
3543                 if ((rdev->pdev->device == 0x1304) ||
3544                     (rdev->pdev->device == 0x1305) ||
3545                     (rdev->pdev->device == 0x130C) ||
3546                     (rdev->pdev->device == 0x130F) ||
3547                     (rdev->pdev->device == 0x1310) ||
3548                     (rdev->pdev->device == 0x1311) ||
3549                     (rdev->pdev->device == 0x131C)) {
3550                         rdev->config.cik.max_cu_per_sh = 8;
3551                         rdev->config.cik.max_backends_per_se = 2;
3552                 } else if ((rdev->pdev->device == 0x1309) ||
3553                            (rdev->pdev->device == 0x130A) ||
3554                            (rdev->pdev->device == 0x130D) ||
3555                            (rdev->pdev->device == 0x1313) ||
3556                            (rdev->pdev->device == 0x131D)) {
3557                         rdev->config.cik.max_cu_per_sh = 6;
3558                         rdev->config.cik.max_backends_per_se = 2;
3559                 } else if ((rdev->pdev->device == 0x1306) ||
3560                            (rdev->pdev->device == 0x1307) ||
3561                            (rdev->pdev->device == 0x130B) ||
3562                            (rdev->pdev->device == 0x130E) ||
3563                            (rdev->pdev->device == 0x1315) ||
3564                            (rdev->pdev->device == 0x1318) ||
3565                            (rdev->pdev->device == 0x131B)) {
3566                         rdev->config.cik.max_cu_per_sh = 4;
3567                         rdev->config.cik.max_backends_per_se = 1;
3568                 } else {
3569                         rdev->config.cik.max_cu_per_sh = 3;
3570                         rdev->config.cik.max_backends_per_se = 1;
3571                 }
3572                 rdev->config.cik.max_sh_per_se = 1;
3573                 rdev->config.cik.max_texture_channel_caches = 4;
3574                 rdev->config.cik.max_gprs = 256;
3575                 rdev->config.cik.max_gs_threads = 16;
3576                 rdev->config.cik.max_hw_contexts = 8;
3577
3578                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3579                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3580                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3581                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3582                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3583                 break;
3584         case CHIP_KABINI:
3585         case CHIP_MULLINS:
3586         default:
3587                 rdev->config.cik.max_shader_engines = 1;
3588                 rdev->config.cik.max_tile_pipes = 2;
3589                 rdev->config.cik.max_cu_per_sh = 2;
3590                 rdev->config.cik.max_sh_per_se = 1;
3591                 rdev->config.cik.max_backends_per_se = 1;
3592                 rdev->config.cik.max_texture_channel_caches = 2;
3593                 rdev->config.cik.max_gprs = 256;
3594                 rdev->config.cik.max_gs_threads = 16;
3595                 rdev->config.cik.max_hw_contexts = 8;
3596
3597                 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3598                 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3599                 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3600                 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3601                 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3602                 break;
3603         }
3604
3605         /* Initialize HDP */
3606         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3607                 WREG32((0x2c14 + j), 0x00000000);
3608                 WREG32((0x2c18 + j), 0x00000000);
3609                 WREG32((0x2c1c + j), 0x00000000);
3610                 WREG32((0x2c20 + j), 0x00000000);
3611                 WREG32((0x2c24 + j), 0x00000000);
3612         }
3613
3614         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3615
3616         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3617
3618         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3619         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3620
3621         rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3622         rdev->config.cik.mem_max_burst_length_bytes = 256;
3623         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3624         rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3625         if (rdev->config.cik.mem_row_size_in_kb > 4)
3626                 rdev->config.cik.mem_row_size_in_kb = 4;
3627         /* XXX use MC settings? */
3628         rdev->config.cik.shader_engine_tile_size = 32;
3629         rdev->config.cik.num_gpus = 1;
3630         rdev->config.cik.multi_gpu_tile_size = 64;
3631
3632         /* fix up row size */
3633         gb_addr_config &= ~ROW_SIZE_MASK;
3634         switch (rdev->config.cik.mem_row_size_in_kb) {
3635         case 1:
3636         default:
3637                 gb_addr_config |= ROW_SIZE(0);
3638                 break;
3639         case 2:
3640                 gb_addr_config |= ROW_SIZE(1);
3641                 break;
3642         case 4:
3643                 gb_addr_config |= ROW_SIZE(2);
3644                 break;
3645         }
3646
3647         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3648          * not have bank info, so create a custom tiling dword.
3649          * bits 3:0   num_pipes
3650          * bits 7:4   num_banks
3651          * bits 11:8  group_size
3652          * bits 15:12 row_size
3653          */
3654         rdev->config.cik.tile_config = 0;
3655         switch (rdev->config.cik.num_tile_pipes) {
3656         case 1:
3657                 rdev->config.cik.tile_config |= (0 << 0);
3658                 break;
3659         case 2:
3660                 rdev->config.cik.tile_config |= (1 << 0);
3661                 break;
3662         case 4:
3663                 rdev->config.cik.tile_config |= (2 << 0);
3664                 break;
3665         case 8:
3666         default:
3667                 /* XXX what about 12? */
3668                 rdev->config.cik.tile_config |= (3 << 0);
3669                 break;
3670         }
3671         rdev->config.cik.tile_config |=
3672                 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3673         rdev->config.cik.tile_config |=
3674                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3675         rdev->config.cik.tile_config |=
3676                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3677
3678         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3679         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3680         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3681         WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3682         WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3683         WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3684         WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3685         WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3686
3687         cik_tiling_mode_table_init(rdev);
3688
3689         cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3690                      rdev->config.cik.max_sh_per_se,
3691                      rdev->config.cik.max_backends_per_se);
3692
3693         rdev->config.cik.active_cus = 0;
3694         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3695                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3696                         rdev->config.cik.active_cus +=
3697                                 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3698                 }
3699         }
3700
3701         /* set HW defaults for 3D engine */
3702         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3703
3704         mutex_lock(&rdev->grbm_idx_mutex);
3705         /*
3706          * making sure that the following register writes will be broadcasted
3707          * to all the shaders
3708          */
3709         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3710         WREG32(SX_DEBUG_1, 0x20);
3711
3712         WREG32(TA_CNTL_AUX, 0x00010000);
3713
3714         tmp = RREG32(SPI_CONFIG_CNTL);
3715         tmp |= 0x03000000;
3716         WREG32(SPI_CONFIG_CNTL, tmp);
3717
3718         WREG32(SQ_CONFIG, 1);
3719
3720         WREG32(DB_DEBUG, 0);
3721
3722         tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3723         tmp |= 0x00000400;
3724         WREG32(DB_DEBUG2, tmp);
3725
3726         tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3727         tmp |= 0x00020200;
3728         WREG32(DB_DEBUG3, tmp);
3729
3730         tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3731         tmp |= 0x00018208;
3732         WREG32(CB_HW_CONTROL, tmp);
3733
3734         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3735
3736         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3737                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3738                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3739                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3740
3741         WREG32(VGT_NUM_INSTANCES, 1);
3742
3743         WREG32(CP_PERFMON_CNTL, 0);
3744
3745         WREG32(SQ_CONFIG, 0);
3746
3747         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3748                                           FORCE_EOV_MAX_REZ_CNT(255)));
3749
3750         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3751                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3752
3753         WREG32(VGT_GS_VERTEX_REUSE, 16);
3754         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3755
3756         tmp = RREG32(HDP_MISC_CNTL);
3757         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3758         WREG32(HDP_MISC_CNTL, tmp);
3759
3760         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3761         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3762
3763         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3764         WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3765         mutex_unlock(&rdev->grbm_idx_mutex);
3766
3767         udelay(50);
3768 }
3769
3770 /*
3771  * GPU scratch registers helpers function.
3772  */
3773 /**
3774  * cik_scratch_init - setup driver info for CP scratch regs
3775  *
3776  * @rdev: radeon_device pointer
3777  *
3778  * Set up the number and offset of the CP scratch registers.
3779  * NOTE: use of CP scratch registers is a legacy inferface and
3780  * is not used by default on newer asics (r6xx+).  On newer asics,
3781  * memory buffers are used for fences rather than scratch regs.
3782  */
3783 static void cik_scratch_init(struct radeon_device *rdev)
3784 {
3785         int i;
3786
3787         rdev->scratch.num_reg = 7;
3788         rdev->scratch.reg_base = SCRATCH_REG0;
3789         for (i = 0; i < rdev->scratch.num_reg; i++) {
3790                 rdev->scratch.free[i] = true;
3791                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3792         }
3793 }
3794
3795 /**
3796  * cik_ring_test - basic gfx ring test
3797  *
3798  * @rdev: radeon_device pointer
3799  * @ring: radeon_ring structure holding ring information
3800  *
3801  * Allocate a scratch register and write to it using the gfx ring (CIK).
3802  * Provides a basic gfx ring test to verify that the ring is working.
3803  * Used by cik_cp_gfx_resume();
3804  * Returns 0 on success, error on failure.
3805  */
3806 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3807 {
3808         uint32_t scratch;
3809         uint32_t tmp = 0;
3810         unsigned i;
3811         int r;
3812
3813         r = radeon_scratch_get(rdev, &scratch);
3814         if (r) {
3815                 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3816                 return r;
3817         }
3818         WREG32(scratch, 0xCAFEDEAD);
3819         r = radeon_ring_lock(rdev, ring, 3);
3820         if (r) {
3821                 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3822                 radeon_scratch_free(rdev, scratch);
3823                 return r;
3824         }
3825         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3826         radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3827         radeon_ring_write(ring, 0xDEADBEEF);
3828         radeon_ring_unlock_commit(rdev, ring, false);
3829
3830         for (i = 0; i < rdev->usec_timeout; i++) {
3831                 tmp = RREG32(scratch);
3832                 if (tmp == 0xDEADBEEF)
3833                         break;
3834                 DRM_UDELAY(1);
3835         }
3836         if (i < rdev->usec_timeout) {
3837                 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3838         } else {
3839                 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3840                           ring->idx, scratch, tmp);
3841                 r = -EINVAL;
3842         }
3843         radeon_scratch_free(rdev, scratch);
3844         return r;
3845 }
3846
3847 /**
3848  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3849  *
3850  * @rdev: radeon_device pointer
3851  * @ridx: radeon ring index
3852  *
3853  * Emits an hdp flush on the cp.
3854  */
3855 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3856                                        int ridx)
3857 {
3858         struct radeon_ring *ring = &rdev->ring[ridx];
3859         u32 ref_and_mask;
3860
3861         switch (ring->idx) {
3862         case CAYMAN_RING_TYPE_CP1_INDEX:
3863         case CAYMAN_RING_TYPE_CP2_INDEX:
3864         default:
3865                 switch (ring->me) {
3866                 case 0:
3867                         ref_and_mask = CP2 << ring->pipe;
3868                         break;
3869                 case 1:
3870                         ref_and_mask = CP6 << ring->pipe;
3871                         break;
3872                 default:
3873                         return;
3874                 }
3875                 break;
3876         case RADEON_RING_TYPE_GFX_INDEX:
3877                 ref_and_mask = CP0;
3878                 break;
3879         }
3880
3881         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3882         radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3883                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
3884                                  WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3885         radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3886         radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3887         radeon_ring_write(ring, ref_and_mask);
3888         radeon_ring_write(ring, ref_and_mask);
3889         radeon_ring_write(ring, 0x20); /* poll interval */
3890 }
3891
3892 /**
3893  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3894  *
3895  * @rdev: radeon_device pointer
3896  * @fence: radeon fence object
3897  *
3898  * Emits a fence sequnce number on the gfx ring and flushes
3899  * GPU caches.
3900  */
3901 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3902                              struct radeon_fence *fence)
3903 {
3904         struct radeon_ring *ring = &rdev->ring[fence->ring];
3905         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3906
3907         /* EVENT_WRITE_EOP - flush caches, send int */
3908         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3909         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3910                                  EOP_TC_ACTION_EN |
3911                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3912                                  EVENT_INDEX(5)));
3913         radeon_ring_write(ring, addr & 0xfffffffc);
3914         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3915         radeon_ring_write(ring, fence->seq);
3916         radeon_ring_write(ring, 0);
3917 }
3918
3919 /**
3920  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3921  *
3922  * @rdev: radeon_device pointer
3923  * @fence: radeon fence object
3924  *
3925  * Emits a fence sequnce number on the compute ring and flushes
3926  * GPU caches.
3927  */
3928 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3929                                  struct radeon_fence *fence)
3930 {
3931         struct radeon_ring *ring = &rdev->ring[fence->ring];
3932         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3933
3934         /* RELEASE_MEM - flush caches, send int */
3935         radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3936         radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3937                                  EOP_TC_ACTION_EN |
3938                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3939                                  EVENT_INDEX(5)));
3940         radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3941         radeon_ring_write(ring, addr & 0xfffffffc);
3942         radeon_ring_write(ring, upper_32_bits(addr));
3943         radeon_ring_write(ring, fence->seq);
3944         radeon_ring_write(ring, 0);
3945 }
3946
3947 /**
3948  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3949  *
3950  * @rdev: radeon_device pointer
3951  * @ring: radeon ring buffer object
3952  * @semaphore: radeon semaphore object
3953  * @emit_wait: Is this a sempahore wait?
3954  *
3955  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3956  * from running ahead of semaphore waits.
3957  */
3958 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3959                              struct radeon_ring *ring,
3960                              struct radeon_semaphore *semaphore,
3961                              bool emit_wait)
3962 {
3963         uint64_t addr = semaphore->gpu_addr;
3964         unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3965
3966         radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3967         radeon_ring_write(ring, lower_32_bits(addr));
3968         radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3969
3970         if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3971                 /* Prevent the PFP from running ahead of the semaphore wait */
3972                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3973                 radeon_ring_write(ring, 0x0);
3974         }
3975
3976         return true;
3977 }
3978
3979 /**
3980  * cik_copy_cpdma - copy pages using the CP DMA engine
3981  *
3982  * @rdev: radeon_device pointer
3983  * @src_offset: src GPU address
3984  * @dst_offset: dst GPU address
3985  * @num_gpu_pages: number of GPU pages to xfer
3986  * @resv: reservation object to sync to
3987  *
3988  * Copy GPU paging using the CP DMA engine (CIK+).
3989  * Used by the radeon ttm implementation to move pages if
3990  * registered as the asic copy callback.
3991  */
3992 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3993                                     uint64_t src_offset, uint64_t dst_offset,
3994                                     unsigned num_gpu_pages,
3995                                     struct reservation_object *resv)
3996 {
3997         struct radeon_fence *fence;
3998         struct radeon_sync sync;
3999         int ring_index = rdev->asic->copy.blit_ring_index;
4000         struct radeon_ring *ring = &rdev->ring[ring_index];
4001         u32 size_in_bytes, cur_size_in_bytes, control;
4002         int i, num_loops;
4003         int r = 0;
4004
4005         radeon_sync_create(&sync);
4006
4007         size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4008         num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4009         r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4010         if (r) {
4011                 DRM_ERROR("radeon: moving bo (%d).\n", r);
4012                 radeon_sync_free(rdev, &sync, NULL);
4013                 return ERR_PTR(r);
4014         }
4015
4016         radeon_sync_resv(rdev, &sync, resv, false);
4017         radeon_sync_rings(rdev, &sync, ring->idx);
4018
4019         for (i = 0; i < num_loops; i++) {
4020                 cur_size_in_bytes = size_in_bytes;
4021                 if (cur_size_in_bytes > 0x1fffff)
4022                         cur_size_in_bytes = 0x1fffff;
4023                 size_in_bytes -= cur_size_in_bytes;
4024                 control = 0;
4025                 if (size_in_bytes == 0)
4026                         control |= PACKET3_DMA_DATA_CP_SYNC;
4027                 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4028                 radeon_ring_write(ring, control);
4029                 radeon_ring_write(ring, lower_32_bits(src_offset));
4030                 radeon_ring_write(ring, upper_32_bits(src_offset));
4031                 radeon_ring_write(ring, lower_32_bits(dst_offset));
4032                 radeon_ring_write(ring, upper_32_bits(dst_offset));
4033                 radeon_ring_write(ring, cur_size_in_bytes);
4034                 src_offset += cur_size_in_bytes;
4035                 dst_offset += cur_size_in_bytes;
4036         }
4037
4038         r = radeon_fence_emit(rdev, &fence, ring->idx);
4039         if (r) {
4040                 radeon_ring_unlock_undo(rdev, ring);
4041                 radeon_sync_free(rdev, &sync, NULL);
4042                 return ERR_PTR(r);
4043         }
4044
4045         radeon_ring_unlock_commit(rdev, ring, false);
4046         radeon_sync_free(rdev, &sync, fence);
4047
4048         return fence;
4049 }
4050
4051 /*
4052  * IB stuff
4053  */
4054 /**
4055  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4056  *
4057  * @rdev: radeon_device pointer
4058  * @ib: radeon indirect buffer object
4059  *
4060  * Emits an DE (drawing engine) or CE (constant engine) IB
4061  * on the gfx ring.  IBs are usually generated by userspace
4062  * acceleration drivers and submitted to the kernel for
4063  * sheduling on the ring.  This function schedules the IB
4064  * on the gfx ring for execution by the GPU.
4065  */
4066 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4067 {
4068         struct radeon_ring *ring = &rdev->ring[ib->ring];
4069         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4070         u32 header, control = INDIRECT_BUFFER_VALID;
4071
4072         if (ib->is_const_ib) {
4073                 /* set switch buffer packet before const IB */
4074                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4075                 radeon_ring_write(ring, 0);
4076
4077                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4078         } else {
4079                 u32 next_rptr;
4080                 if (ring->rptr_save_reg) {
4081                         next_rptr = ring->wptr + 3 + 4;
4082                         radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4083                         radeon_ring_write(ring, ((ring->rptr_save_reg -
4084                                                   PACKET3_SET_UCONFIG_REG_START) >> 2));
4085                         radeon_ring_write(ring, next_rptr);
4086                 } else if (rdev->wb.enabled) {
4087                         next_rptr = ring->wptr + 5 + 4;
4088                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4089                         radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4090                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4091                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4092                         radeon_ring_write(ring, next_rptr);
4093                 }
4094
4095                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4096         }
4097
4098         control |= ib->length_dw | (vm_id << 24);
4099
4100         radeon_ring_write(ring, header);
4101         radeon_ring_write(ring,
4102 #ifdef __BIG_ENDIAN
4103                           (2 << 0) |
4104 #endif
4105                           (ib->gpu_addr & 0xFFFFFFFC));
4106         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4107         radeon_ring_write(ring, control);
4108 }
4109
4110 /**
4111  * cik_ib_test - basic gfx ring IB test
4112  *
4113  * @rdev: radeon_device pointer
4114  * @ring: radeon_ring structure holding ring information
4115  *
4116  * Allocate an IB and execute it on the gfx ring (CIK).
4117  * Provides a basic gfx ring test to verify that IBs are working.
4118  * Returns 0 on success, error on failure.
4119  */
4120 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4121 {
4122         struct radeon_ib ib;
4123         uint32_t scratch;
4124         uint32_t tmp = 0;
4125         unsigned i;
4126         int r;
4127
4128         r = radeon_scratch_get(rdev, &scratch);
4129         if (r) {
4130                 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4131                 return r;
4132         }
4133         WREG32(scratch, 0xCAFEDEAD);
4134         r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4135         if (r) {
4136                 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4137                 radeon_scratch_free(rdev, scratch);
4138                 return r;
4139         }
4140         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4141         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4142         ib.ptr[2] = 0xDEADBEEF;
4143         ib.length_dw = 3;
4144         r = radeon_ib_schedule(rdev, &ib, NULL, false);
4145         if (r) {
4146                 radeon_scratch_free(rdev, scratch);
4147                 radeon_ib_free(rdev, &ib);
4148                 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4149                 return r;
4150         }
4151         r = radeon_fence_wait(ib.fence, false);
4152         if (r) {
4153                 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4154                 radeon_scratch_free(rdev, scratch);
4155                 radeon_ib_free(rdev, &ib);
4156                 return r;
4157         }
4158         for (i = 0; i < rdev->usec_timeout; i++) {
4159                 tmp = RREG32(scratch);
4160                 if (tmp == 0xDEADBEEF)
4161                         break;
4162                 DRM_UDELAY(1);
4163         }
4164         if (i < rdev->usec_timeout) {
4165                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4166         } else {
4167                 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4168                           scratch, tmp);
4169                 r = -EINVAL;
4170         }
4171         radeon_scratch_free(rdev, scratch);
4172         radeon_ib_free(rdev, &ib);
4173         return r;
4174 }
4175
4176 /*
4177  * CP.
4178  * On CIK, gfx and compute now have independant command processors.
4179  *
4180  * GFX
4181  * Gfx consists of a single ring and can process both gfx jobs and
4182  * compute jobs.  The gfx CP consists of three microengines (ME):
4183  * PFP - Pre-Fetch Parser
4184  * ME - Micro Engine
4185  * CE - Constant Engine
4186  * The PFP and ME make up what is considered the Drawing Engine (DE).
4187  * The CE is an asynchronous engine used for updating buffer desciptors
4188  * used by the DE so that they can be loaded into cache in parallel
4189  * while the DE is processing state update packets.
4190  *
4191  * Compute
4192  * The compute CP consists of two microengines (ME):
4193  * MEC1 - Compute MicroEngine 1
4194  * MEC2 - Compute MicroEngine 2
4195  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4196  * The queues are exposed to userspace and are programmed directly
4197  * by the compute runtime.
4198  */
4199 /**
4200  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4201  *
4202  * @rdev: radeon_device pointer
4203  * @enable: enable or disable the MEs
4204  *
4205  * Halts or unhalts the gfx MEs.
4206  */
4207 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4208 {
4209         if (enable)
4210                 WREG32(CP_ME_CNTL, 0);
4211         else {
4212                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4213                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4214                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4215                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4216         }
4217         udelay(50);
4218 }
4219
4220 /**
4221  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4222  *
4223  * @rdev: radeon_device pointer
4224  *
4225  * Loads the gfx PFP, ME, and CE ucode.
4226  * Returns 0 for success, -EINVAL if the ucode is not available.
4227  */
4228 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4229 {
4230         int i;
4231
4232         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4233                 return -EINVAL;
4234
4235         cik_cp_gfx_enable(rdev, false);
4236
4237         if (rdev->new_fw) {
4238                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
4239                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4240                 const struct gfx_firmware_header_v1_0 *ce_hdr =
4241                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4242                 const struct gfx_firmware_header_v1_0 *me_hdr =
4243                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4244                 const __le32 *fw_data;
4245                 u32 fw_size;
4246
4247                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4248                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4249                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
4250
4251                 /* PFP */
4252                 fw_data = (const __le32 *)
4253                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4254                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4255                 WREG32(CP_PFP_UCODE_ADDR, 0);
4256                 for (i = 0; i < fw_size; i++)
4257                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4258                 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4259
4260                 /* CE */
4261                 fw_data = (const __le32 *)
4262                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4263                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4264                 WREG32(CP_CE_UCODE_ADDR, 0);
4265                 for (i = 0; i < fw_size; i++)
4266                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4267                 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4268
4269                 /* ME */
4270                 fw_data = (const __be32 *)
4271                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4272                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4273                 WREG32(CP_ME_RAM_WADDR, 0);
4274                 for (i = 0; i < fw_size; i++)
4275                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4276                 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4277                 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4278         } else {
4279                 const __be32 *fw_data;
4280
4281                 /* PFP */
4282                 fw_data = (const __be32 *)rdev->pfp_fw->data;
4283                 WREG32(CP_PFP_UCODE_ADDR, 0);
4284                 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4285                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4286                 WREG32(CP_PFP_UCODE_ADDR, 0);
4287
4288                 /* CE */
4289                 fw_data = (const __be32 *)rdev->ce_fw->data;
4290                 WREG32(CP_CE_UCODE_ADDR, 0);
4291                 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4292                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4293                 WREG32(CP_CE_UCODE_ADDR, 0);
4294
4295                 /* ME */
4296                 fw_data = (const __be32 *)rdev->me_fw->data;
4297                 WREG32(CP_ME_RAM_WADDR, 0);
4298                 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4299                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4300                 WREG32(CP_ME_RAM_WADDR, 0);
4301         }
4302
4303         return 0;
4304 }
4305
4306 /**
4307  * cik_cp_gfx_start - start the gfx ring
4308  *
4309  * @rdev: radeon_device pointer
4310  *
4311  * Enables the ring and loads the clear state context and other
4312  * packets required to init the ring.
4313  * Returns 0 for success, error for failure.
4314  */
4315 static int cik_cp_gfx_start(struct radeon_device *rdev)
4316 {
4317         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4318         int r, i;
4319
4320         /* init the CP */
4321         WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4322         WREG32(CP_ENDIAN_SWAP, 0);
4323         WREG32(CP_DEVICE_ID, 1);
4324
4325         cik_cp_gfx_enable(rdev, true);
4326
4327         r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4328         if (r) {
4329                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4330                 return r;
4331         }
4332
4333         /* init the CE partitions.  CE only used for gfx on CIK */
4334         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4335         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4336         radeon_ring_write(ring, 0x8000);
4337         radeon_ring_write(ring, 0x8000);
4338
4339         /* setup clear context state */
4340         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4341         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4342
4343         radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4344         radeon_ring_write(ring, 0x80000000);
4345         radeon_ring_write(ring, 0x80000000);
4346
4347         for (i = 0; i < cik_default_size; i++)
4348                 radeon_ring_write(ring, cik_default_state[i]);
4349
4350         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4351         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4352
4353         /* set clear context state */
4354         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4355         radeon_ring_write(ring, 0);
4356
4357         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4358         radeon_ring_write(ring, 0x00000316);
4359         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4360         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4361
4362         radeon_ring_unlock_commit(rdev, ring, false);
4363
4364         return 0;
4365 }
4366
4367 /**
4368  * cik_cp_gfx_fini - stop the gfx ring
4369  *
4370  * @rdev: radeon_device pointer
4371  *
4372  * Stop the gfx ring and tear down the driver ring
4373  * info.
4374  */
4375 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4376 {
4377         cik_cp_gfx_enable(rdev, false);
4378         radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4379 }
4380
4381 /**
4382  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4383  *
4384  * @rdev: radeon_device pointer
4385  *
4386  * Program the location and size of the gfx ring buffer
4387  * and test it to make sure it's working.
4388  * Returns 0 for success, error for failure.
4389  */
4390 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4391 {
4392         struct radeon_ring *ring;
4393         u32 tmp;
4394         u32 rb_bufsz;
4395         u64 rb_addr;
4396         int r;
4397
4398         WREG32(CP_SEM_WAIT_TIMER, 0x0);
4399         if (rdev->family != CHIP_HAWAII)
4400                 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4401
4402         /* Set the write pointer delay */
4403         WREG32(CP_RB_WPTR_DELAY, 0);
4404
4405         /* set the RB to use vmid 0 */
4406         WREG32(CP_RB_VMID, 0);
4407
4408         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4409
4410         /* ring 0 - compute and gfx */
4411         /* Set ring buffer size */
4412         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4413         rb_bufsz = order_base_2(ring->ring_size / 8);
4414         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4415 #ifdef __BIG_ENDIAN
4416         tmp |= BUF_SWAP_32BIT;
4417 #endif
4418         WREG32(CP_RB0_CNTL, tmp);
4419
4420         /* Initialize the ring buffer's read and write pointers */
4421         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4422         ring->wptr = 0;
4423         WREG32(CP_RB0_WPTR, ring->wptr);
4424
4425         /* set the wb address wether it's enabled or not */
4426         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4427         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4428
4429         /* scratch register shadowing is no longer supported */
4430         WREG32(SCRATCH_UMSK, 0);
4431
4432         if (!rdev->wb.enabled)
4433                 tmp |= RB_NO_UPDATE;
4434
4435         mdelay(1);
4436         WREG32(CP_RB0_CNTL, tmp);
4437
4438         rb_addr = ring->gpu_addr >> 8;
4439         WREG32(CP_RB0_BASE, rb_addr);
4440         WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4441
4442         /* start the ring */
4443         cik_cp_gfx_start(rdev);
4444         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4445         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4446         if (r) {
4447                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4448                 return r;
4449         }
4450
4451         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4452                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4453
4454         return 0;
4455 }
4456
4457 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4458                      struct radeon_ring *ring)
4459 {
4460         u32 rptr;
4461
4462         if (rdev->wb.enabled)
4463                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4464         else
4465                 rptr = RREG32(CP_RB0_RPTR);
4466
4467         return rptr;
4468 }
4469
4470 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4471                      struct radeon_ring *ring)
4472 {
4473         u32 wptr;
4474
4475         wptr = RREG32(CP_RB0_WPTR);
4476
4477         return wptr;
4478 }
4479
4480 void cik_gfx_set_wptr(struct radeon_device *rdev,
4481                       struct radeon_ring *ring)
4482 {
4483         WREG32(CP_RB0_WPTR, ring->wptr);
4484         (void)RREG32(CP_RB0_WPTR);
4485 }
4486
4487 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4488                          struct radeon_ring *ring)
4489 {
4490         u32 rptr;
4491
4492         if (rdev->wb.enabled) {
4493                 rptr = rdev->wb.wb[ring->rptr_offs/4];
4494         } else {
4495                 mutex_lock(&rdev->srbm_mutex);
4496                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4497                 rptr = RREG32(CP_HQD_PQ_RPTR);
4498                 cik_srbm_select(rdev, 0, 0, 0, 0);
4499                 mutex_unlock(&rdev->srbm_mutex);
4500         }
4501
4502         return rptr;
4503 }
4504
4505 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4506                          struct radeon_ring *ring)
4507 {
4508         u32 wptr;
4509
4510         if (rdev->wb.enabled) {
4511                 /* XXX check if swapping is necessary on BE */
4512                 wptr = rdev->wb.wb[ring->wptr_offs/4];
4513         } else {
4514                 mutex_lock(&rdev->srbm_mutex);
4515                 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4516                 wptr = RREG32(CP_HQD_PQ_WPTR);
4517                 cik_srbm_select(rdev, 0, 0, 0, 0);
4518                 mutex_unlock(&rdev->srbm_mutex);
4519         }
4520
4521         return wptr;
4522 }
4523
4524 void cik_compute_set_wptr(struct radeon_device *rdev,
4525                           struct radeon_ring *ring)
4526 {
4527         /* XXX check if swapping is necessary on BE */
4528         rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4529         WDOORBELL32(ring->doorbell_index, ring->wptr);
4530 }
4531
4532 /**
4533  * cik_cp_compute_enable - enable/disable the compute CP MEs
4534  *
4535  * @rdev: radeon_device pointer
4536  * @enable: enable or disable the MEs
4537  *
4538  * Halts or unhalts the compute MEs.
4539  */
4540 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4541 {
4542         if (enable)
4543                 WREG32(CP_MEC_CNTL, 0);
4544         else {
4545                 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4546                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4547                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4548         }
4549         udelay(50);
4550 }
4551
4552 /**
4553  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4554  *
4555  * @rdev: radeon_device pointer
4556  *
4557  * Loads the compute MEC1&2 ucode.
4558  * Returns 0 for success, -EINVAL if the ucode is not available.
4559  */
4560 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4561 {
4562         int i;
4563
4564         if (!rdev->mec_fw)
4565                 return -EINVAL;
4566
4567         cik_cp_compute_enable(rdev, false);
4568
4569         if (rdev->new_fw) {
4570                 const struct gfx_firmware_header_v1_0 *mec_hdr =
4571                         (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4572                 const __le32 *fw_data;
4573                 u32 fw_size;
4574
4575                 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4576
4577                 /* MEC1 */
4578                 fw_data = (const __le32 *)
4579                         (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4580                 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4581                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4582                 for (i = 0; i < fw_size; i++)
4583                         WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4584                 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4585
4586                 /* MEC2 */
4587                 if (rdev->family == CHIP_KAVERI) {
4588                         const struct gfx_firmware_header_v1_0 *mec2_hdr =
4589                                 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4590
4591                         fw_data = (const __le32 *)
4592                                 (rdev->mec2_fw->data +
4593                                  le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4594                         fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4595                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4596                         for (i = 0; i < fw_size; i++)
4597                                 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4598                         WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4599                 }
4600         } else {
4601                 const __be32 *fw_data;
4602
4603                 /* MEC1 */
4604                 fw_data = (const __be32 *)rdev->mec_fw->data;
4605                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4606                 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4607                         WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4608                 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4609
4610                 if (rdev->family == CHIP_KAVERI) {
4611                         /* MEC2 */
4612                         fw_data = (const __be32 *)rdev->mec_fw->data;
4613                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4614                         for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4615                                 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4616                         WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4617                 }
4618         }
4619
4620         return 0;
4621 }
4622
4623 /**
4624  * cik_cp_compute_start - start the compute queues
4625  *
4626  * @rdev: radeon_device pointer
4627  *
4628  * Enable the compute queues.
4629  * Returns 0 for success, error for failure.
4630  */
4631 static int cik_cp_compute_start(struct radeon_device *rdev)
4632 {
4633         cik_cp_compute_enable(rdev, true);
4634
4635         return 0;
4636 }
4637
4638 /**
4639  * cik_cp_compute_fini - stop the compute queues
4640  *
4641  * @rdev: radeon_device pointer
4642  *
4643  * Stop the compute queues and tear down the driver queue
4644  * info.
4645  */
4646 static void cik_cp_compute_fini(struct radeon_device *rdev)
4647 {
4648         int i, idx, r;
4649
4650         cik_cp_compute_enable(rdev, false);
4651
4652         for (i = 0; i < 2; i++) {
4653                 if (i == 0)
4654                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4655                 else
4656                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4657
4658                 if (rdev->ring[idx].mqd_obj) {
4659                         r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4660                         if (unlikely(r != 0))
4661                                 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4662
4663                         radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4664                         radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4665
4666                         radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4667                         rdev->ring[idx].mqd_obj = NULL;
4668                 }
4669         }
4670 }
4671
4672 static void cik_mec_fini(struct radeon_device *rdev)
4673 {
4674         int r;
4675
4676         if (rdev->mec.hpd_eop_obj) {
4677                 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4678                 if (unlikely(r != 0))
4679                         dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4680                 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4681                 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4682
4683                 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4684                 rdev->mec.hpd_eop_obj = NULL;
4685         }
4686 }
4687
4688 #define MEC_HPD_SIZE 2048
4689
4690 static int cik_mec_init(struct radeon_device *rdev)
4691 {
4692         int r;
4693         u32 *hpd;
4694
4695         /*
4696          * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4697          * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4698          * Nonetheless, we assign only 1 pipe because all other pipes will
4699          * be handled by KFD
4700          */
4701         rdev->mec.num_mec = 1;
4702         rdev->mec.num_pipe = 1;
4703         rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4704
4705         if (rdev->mec.hpd_eop_obj == NULL) {
4706                 r = radeon_bo_create(rdev,
4707                                      rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4708                                      PAGE_SIZE, true,
4709                                      RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4710                                      &rdev->mec.hpd_eop_obj);
4711                 if (r) {
4712                         dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4713                         return r;
4714                 }
4715         }
4716
4717         r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4718         if (unlikely(r != 0)) {
4719                 cik_mec_fini(rdev);
4720                 return r;
4721         }
4722         r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4723                           &rdev->mec.hpd_eop_gpu_addr);
4724         if (r) {
4725                 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4726                 cik_mec_fini(rdev);
4727                 return r;
4728         }
4729         r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4730         if (r) {
4731                 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4732                 cik_mec_fini(rdev);
4733                 return r;
4734         }
4735
4736         /* clear memory.  Not sure if this is required or not */
4737         memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4738
4739         radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4740         radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4741
4742         return 0;
4743 }
4744
4745 struct hqd_registers
4746 {
4747         u32 cp_mqd_base_addr;
4748         u32 cp_mqd_base_addr_hi;
4749         u32 cp_hqd_active;
4750         u32 cp_hqd_vmid;
4751         u32 cp_hqd_persistent_state;
4752         u32 cp_hqd_pipe_priority;
4753         u32 cp_hqd_queue_priority;
4754         u32 cp_hqd_quantum;
4755         u32 cp_hqd_pq_base;
4756         u32 cp_hqd_pq_base_hi;
4757         u32 cp_hqd_pq_rptr;
4758         u32 cp_hqd_pq_rptr_report_addr;
4759         u32 cp_hqd_pq_rptr_report_addr_hi;
4760         u32 cp_hqd_pq_wptr_poll_addr;
4761         u32 cp_hqd_pq_wptr_poll_addr_hi;
4762         u32 cp_hqd_pq_doorbell_control;
4763         u32 cp_hqd_pq_wptr;
4764         u32 cp_hqd_pq_control;
4765         u32 cp_hqd_ib_base_addr;
4766         u32 cp_hqd_ib_base_addr_hi;
4767         u32 cp_hqd_ib_rptr;
4768         u32 cp_hqd_ib_control;
4769         u32 cp_hqd_iq_timer;
4770         u32 cp_hqd_iq_rptr;
4771         u32 cp_hqd_dequeue_request;
4772         u32 cp_hqd_dma_offload;
4773         u32 cp_hqd_sema_cmd;
4774         u32 cp_hqd_msg_type;
4775         u32 cp_hqd_atomic0_preop_lo;
4776         u32 cp_hqd_atomic0_preop_hi;
4777         u32 cp_hqd_atomic1_preop_lo;
4778         u32 cp_hqd_atomic1_preop_hi;
4779         u32 cp_hqd_hq_scheduler0;
4780         u32 cp_hqd_hq_scheduler1;
4781         u32 cp_mqd_control;
4782 };
4783
4784 struct bonaire_mqd
4785 {
4786         u32 header;
4787         u32 dispatch_initiator;
4788         u32 dimensions[3];
4789         u32 start_idx[3];
4790         u32 num_threads[3];
4791         u32 pipeline_stat_enable;
4792         u32 perf_counter_enable;
4793         u32 pgm[2];
4794         u32 tba[2];
4795         u32 tma[2];
4796         u32 pgm_rsrc[2];
4797         u32 vmid;
4798         u32 resource_limits;
4799         u32 static_thread_mgmt01[2];
4800         u32 tmp_ring_size;
4801         u32 static_thread_mgmt23[2];
4802         u32 restart[3];
4803         u32 thread_trace_enable;
4804         u32 reserved1;
4805         u32 user_data[16];
4806         u32 vgtcs_invoke_count[2];
4807         struct hqd_registers queue_state;
4808         u32 dequeue_cntr;
4809         u32 interrupt_queue[64];
4810 };
4811
4812 /**
4813  * cik_cp_compute_resume - setup the compute queue registers
4814  *
4815  * @rdev: radeon_device pointer
4816  *
4817  * Program the compute queues and test them to make sure they
4818  * are working.
4819  * Returns 0 for success, error for failure.
4820  */
4821 static int cik_cp_compute_resume(struct radeon_device *rdev)
4822 {
4823         int r, i, j, idx;
4824         u32 tmp;
4825         bool use_doorbell = true;
4826         u64 hqd_gpu_addr;
4827         u64 mqd_gpu_addr;
4828         u64 eop_gpu_addr;
4829         u64 wb_gpu_addr;
4830         u32 *buf;
4831         struct bonaire_mqd *mqd;
4832
4833         r = cik_cp_compute_start(rdev);
4834         if (r)
4835                 return r;
4836
4837         /* fix up chicken bits */
4838         tmp = RREG32(CP_CPF_DEBUG);
4839         tmp |= (1 << 23);
4840         WREG32(CP_CPF_DEBUG, tmp);
4841
4842         /* init the pipes */
4843         mutex_lock(&rdev->srbm_mutex);
4844
4845         eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4846
4847         cik_srbm_select(rdev, 0, 0, 0, 0);
4848
4849         /* write the EOP addr */
4850         WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4851         WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4852
4853         /* set the VMID assigned */
4854         WREG32(CP_HPD_EOP_VMID, 0);
4855
4856         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4857         tmp = RREG32(CP_HPD_EOP_CONTROL);
4858         tmp &= ~EOP_SIZE_MASK;
4859         tmp |= order_base_2(MEC_HPD_SIZE / 8);
4860         WREG32(CP_HPD_EOP_CONTROL, tmp);
4861
4862         mutex_unlock(&rdev->srbm_mutex);
4863
4864         /* init the queues.  Just two for now. */
4865         for (i = 0; i < 2; i++) {
4866                 if (i == 0)
4867                         idx = CAYMAN_RING_TYPE_CP1_INDEX;
4868                 else
4869                         idx = CAYMAN_RING_TYPE_CP2_INDEX;
4870
4871                 if (rdev->ring[idx].mqd_obj == NULL) {
4872                         r = radeon_bo_create(rdev,
4873                                              sizeof(struct bonaire_mqd),
4874                                              PAGE_SIZE, true,
4875                                              RADEON_GEM_DOMAIN_GTT, 0, NULL,
4876                                              NULL, &rdev->ring[idx].mqd_obj);
4877                         if (r) {
4878                                 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4879                                 return r;
4880                         }
4881                 }
4882
4883                 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4884                 if (unlikely(r != 0)) {
4885                         cik_cp_compute_fini(rdev);
4886                         return r;
4887                 }
4888                 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4889                                   &mqd_gpu_addr);
4890                 if (r) {
4891                         dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4892                         cik_cp_compute_fini(rdev);
4893                         return r;
4894                 }
4895                 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4896                 if (r) {
4897                         dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4898                         cik_cp_compute_fini(rdev);
4899                         return r;
4900                 }
4901
4902                 /* init the mqd struct */
4903                 memset(buf, 0, sizeof(struct bonaire_mqd));
4904
4905                 mqd = (struct bonaire_mqd *)buf;
4906                 mqd->header = 0xC0310800;
4907                 mqd->static_thread_mgmt01[0] = 0xffffffff;
4908                 mqd->static_thread_mgmt01[1] = 0xffffffff;
4909                 mqd->static_thread_mgmt23[0] = 0xffffffff;
4910                 mqd->static_thread_mgmt23[1] = 0xffffffff;
4911
4912                 mutex_lock(&rdev->srbm_mutex);
4913                 cik_srbm_select(rdev, rdev->ring[idx].me,
4914                                 rdev->ring[idx].pipe,
4915                                 rdev->ring[idx].queue, 0);
4916
4917                 /* disable wptr polling */
4918                 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4919                 tmp &= ~WPTR_POLL_EN;
4920                 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4921
4922                 /* enable doorbell? */
4923                 mqd->queue_state.cp_hqd_pq_doorbell_control =
4924                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4925                 if (use_doorbell)
4926                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4927                 else
4928                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4929                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4930                        mqd->queue_state.cp_hqd_pq_doorbell_control);
4931
4932                 /* disable the queue if it's active */
4933                 mqd->queue_state.cp_hqd_dequeue_request = 0;
4934                 mqd->queue_state.cp_hqd_pq_rptr = 0;
4935                 mqd->queue_state.cp_hqd_pq_wptr= 0;
4936                 if (RREG32(CP_HQD_ACTIVE) & 1) {
4937                         WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4938                         for (j = 0; j < rdev->usec_timeout; j++) {
4939                                 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4940                                         break;
4941                                 udelay(1);
4942                         }
4943                         WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4944                         WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4945                         WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4946                 }
4947
4948                 /* set the pointer to the MQD */
4949                 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4950                 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4951                 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4952                 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4953                 /* set MQD vmid to 0 */
4954                 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4955                 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4956                 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4957
4958                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4959                 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4960                 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4961                 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4962                 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4963                 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4964
4965                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4966                 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4967                 mqd->queue_state.cp_hqd_pq_control &=
4968                         ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4969
4970                 mqd->queue_state.cp_hqd_pq_control |=
4971                         order_base_2(rdev->ring[idx].ring_size / 8);
4972                 mqd->queue_state.cp_hqd_pq_control |=
4973                         (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4974 #ifdef __BIG_ENDIAN
4975                 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4976 #endif
4977                 mqd->queue_state.cp_hqd_pq_control &=
4978                         ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4979                 mqd->queue_state.cp_hqd_pq_control |=
4980                         PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4981                 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4982
4983                 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4984                 if (i == 0)
4985                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4986                 else
4987                         wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4988                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4989                 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4990                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4991                 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4992                        mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4993
4994                 /* set the wb address wether it's enabled or not */
4995                 if (i == 0)
4996                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4997                 else
4998                         wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4999                 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5000                 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5001                         upper_32_bits(wb_gpu_addr) & 0xffff;
5002                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5003                        mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5004                 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5005                        mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5006
5007                 /* enable the doorbell if requested */
5008                 if (use_doorbell) {
5009                         mqd->queue_state.cp_hqd_pq_doorbell_control =
5010                                 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5011                         mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5012                         mqd->queue_state.cp_hqd_pq_doorbell_control |=
5013                                 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5014                         mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5015                         mqd->queue_state.cp_hqd_pq_doorbell_control &=
5016                                 ~(DOORBELL_SOURCE | DOORBELL_HIT);
5017
5018                 } else {
5019                         mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5020                 }
5021                 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5022                        mqd->queue_state.cp_hqd_pq_doorbell_control);
5023
5024                 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5025                 rdev->ring[idx].wptr = 0;
5026                 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5027                 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5028                 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5029
5030                 /* set the vmid for the queue */
5031                 mqd->queue_state.cp_hqd_vmid = 0;
5032                 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5033
5034                 /* activate the queue */
5035                 mqd->queue_state.cp_hqd_active = 1;
5036                 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5037
5038                 cik_srbm_select(rdev, 0, 0, 0, 0);
5039                 mutex_unlock(&rdev->srbm_mutex);
5040
5041                 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5042                 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5043
5044                 rdev->ring[idx].ready = true;
5045                 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5046                 if (r)
5047                         rdev->ring[idx].ready = false;
5048         }
5049
5050         return 0;
5051 }
5052
5053 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5054 {
5055         cik_cp_gfx_enable(rdev, enable);
5056         cik_cp_compute_enable(rdev, enable);
5057 }
5058
5059 static int cik_cp_load_microcode(struct radeon_device *rdev)
5060 {
5061         int r;
5062
5063         r = cik_cp_gfx_load_microcode(rdev);
5064         if (r)
5065                 return r;
5066         r = cik_cp_compute_load_microcode(rdev);
5067         if (r)
5068                 return r;
5069
5070         return 0;
5071 }
5072
5073 static void cik_cp_fini(struct radeon_device *rdev)
5074 {
5075         cik_cp_gfx_fini(rdev);
5076         cik_cp_compute_fini(rdev);
5077 }
5078
5079 static int cik_cp_resume(struct radeon_device *rdev)
5080 {
5081         int r;
5082
5083         cik_enable_gui_idle_interrupt(rdev, false);
5084
5085         r = cik_cp_load_microcode(rdev);
5086         if (r)
5087                 return r;
5088
5089         r = cik_cp_gfx_resume(rdev);
5090         if (r)
5091                 return r;
5092         r = cik_cp_compute_resume(rdev);
5093         if (r)
5094                 return r;
5095
5096         cik_enable_gui_idle_interrupt(rdev, true);
5097
5098         return 0;
5099 }
5100
5101 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5102 {
5103         dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5104                 RREG32(GRBM_STATUS));
5105         dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5106                 RREG32(GRBM_STATUS2));
5107         dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5108                 RREG32(GRBM_STATUS_SE0));
5109         dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5110                 RREG32(GRBM_STATUS_SE1));
5111         dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5112                 RREG32(GRBM_STATUS_SE2));
5113         dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5114                 RREG32(GRBM_STATUS_SE3));
5115         dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5116                 RREG32(SRBM_STATUS));
5117         dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5118                 RREG32(SRBM_STATUS2));
5119         dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5120                 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5121         dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5122                  RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5123         dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5124         dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5125                  RREG32(CP_STALLED_STAT1));
5126         dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5127                  RREG32(CP_STALLED_STAT2));
5128         dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5129                  RREG32(CP_STALLED_STAT3));
5130         dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5131                  RREG32(CP_CPF_BUSY_STAT));
5132         dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5133                  RREG32(CP_CPF_STALLED_STAT1));
5134         dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5135         dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5136         dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5137                  RREG32(CP_CPC_STALLED_STAT1));
5138         dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5139 }
5140
5141 /**
5142  * cik_gpu_check_soft_reset - check which blocks are busy
5143  *
5144  * @rdev: radeon_device pointer
5145  *
5146  * Check which blocks are busy and return the relevant reset
5147  * mask to be used by cik_gpu_soft_reset().
5148  * Returns a mask of the blocks to be reset.
5149  */
5150 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5151 {
5152         u32 reset_mask = 0;
5153         u32 tmp;
5154
5155         /* GRBM_STATUS */
5156         tmp = RREG32(GRBM_STATUS);
5157         if (tmp & (PA_BUSY | SC_BUSY |
5158                    BCI_BUSY | SX_BUSY |
5159                    TA_BUSY | VGT_BUSY |
5160                    DB_BUSY | CB_BUSY |
5161                    GDS_BUSY | SPI_BUSY |
5162                    IA_BUSY | IA_BUSY_NO_DMA))
5163                 reset_mask |= RADEON_RESET_GFX;
5164
5165         if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5166                 reset_mask |= RADEON_RESET_CP;
5167
5168         /* GRBM_STATUS2 */
5169         tmp = RREG32(GRBM_STATUS2);
5170         if (tmp & RLC_BUSY)
5171                 reset_mask |= RADEON_RESET_RLC;
5172
5173         /* SDMA0_STATUS_REG */
5174         tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5175         if (!(tmp & SDMA_IDLE))
5176                 reset_mask |= RADEON_RESET_DMA;
5177
5178         /* SDMA1_STATUS_REG */
5179         tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5180         if (!(tmp & SDMA_IDLE))
5181                 reset_mask |= RADEON_RESET_DMA1;
5182
5183         /* SRBM_STATUS2 */
5184         tmp = RREG32(SRBM_STATUS2);
5185         if (tmp & SDMA_BUSY)
5186                 reset_mask |= RADEON_RESET_DMA;
5187
5188         if (tmp & SDMA1_BUSY)
5189                 reset_mask |= RADEON_RESET_DMA1;
5190
5191         /* SRBM_STATUS */
5192         tmp = RREG32(SRBM_STATUS);
5193
5194         if (tmp & IH_BUSY)
5195                 reset_mask |= RADEON_RESET_IH;
5196
5197         if (tmp & SEM_BUSY)
5198                 reset_mask |= RADEON_RESET_SEM;
5199
5200         if (tmp & GRBM_RQ_PENDING)
5201                 reset_mask |= RADEON_RESET_GRBM;
5202
5203         if (tmp & VMC_BUSY)
5204                 reset_mask |= RADEON_RESET_VMC;
5205
5206         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5207                    MCC_BUSY | MCD_BUSY))
5208                 reset_mask |= RADEON_RESET_MC;
5209
5210         if (evergreen_is_display_hung(rdev))
5211                 reset_mask |= RADEON_RESET_DISPLAY;
5212
5213         /* Skip MC reset as it's mostly likely not hung, just busy */
5214         if (reset_mask & RADEON_RESET_MC) {
5215                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5216                 reset_mask &= ~RADEON_RESET_MC;
5217         }
5218
5219         return reset_mask;
5220 }
5221
5222 /**
5223  * cik_gpu_soft_reset - soft reset GPU
5224  *
5225  * @rdev: radeon_device pointer
5226  * @reset_mask: mask of which blocks to reset
5227  *
5228  * Soft reset the blocks specified in @reset_mask.
5229  */
5230 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5231 {
5232         struct evergreen_mc_save save;
5233         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5234         u32 tmp;
5235
5236         if (reset_mask == 0)
5237                 return;
5238
5239         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5240
5241         cik_print_gpu_status_regs(rdev);
5242         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5243                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5244         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5245                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5246
5247         /* disable CG/PG */
5248         cik_fini_pg(rdev);
5249         cik_fini_cg(rdev);
5250
5251         /* stop the rlc */
5252         cik_rlc_stop(rdev);
5253
5254         /* Disable GFX parsing/prefetching */
5255         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5256
5257         /* Disable MEC parsing/prefetching */
5258         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5259
5260         if (reset_mask & RADEON_RESET_DMA) {
5261                 /* sdma0 */
5262                 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5263                 tmp |= SDMA_HALT;
5264                 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5265         }
5266         if (reset_mask & RADEON_RESET_DMA1) {
5267                 /* sdma1 */
5268                 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5269                 tmp |= SDMA_HALT;
5270                 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5271         }
5272
5273         evergreen_mc_stop(rdev, &save);
5274         if (evergreen_mc_wait_for_idle(rdev)) {
5275                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5276         }
5277
5278         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5279                 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5280
5281         if (reset_mask & RADEON_RESET_CP) {
5282                 grbm_soft_reset |= SOFT_RESET_CP;
5283
5284                 srbm_soft_reset |= SOFT_RESET_GRBM;
5285         }
5286
5287         if (reset_mask & RADEON_RESET_DMA)
5288                 srbm_soft_reset |= SOFT_RESET_SDMA;
5289
5290         if (reset_mask & RADEON_RESET_DMA1)
5291                 srbm_soft_reset |= SOFT_RESET_SDMA1;
5292
5293         if (reset_mask & RADEON_RESET_DISPLAY)
5294                 srbm_soft_reset |= SOFT_RESET_DC;
5295
5296         if (reset_mask & RADEON_RESET_RLC)
5297                 grbm_soft_reset |= SOFT_RESET_RLC;
5298
5299         if (reset_mask & RADEON_RESET_SEM)
5300                 srbm_soft_reset |= SOFT_RESET_SEM;
5301
5302         if (reset_mask & RADEON_RESET_IH)
5303                 srbm_soft_reset |= SOFT_RESET_IH;
5304
5305         if (reset_mask & RADEON_RESET_GRBM)
5306                 srbm_soft_reset |= SOFT_RESET_GRBM;
5307
5308         if (reset_mask & RADEON_RESET_VMC)
5309                 srbm_soft_reset |= SOFT_RESET_VMC;
5310
5311         if (!(rdev->flags & RADEON_IS_IGP)) {
5312                 if (reset_mask & RADEON_RESET_MC)
5313                         srbm_soft_reset |= SOFT_RESET_MC;
5314         }
5315
5316         if (grbm_soft_reset) {
5317                 tmp = RREG32(GRBM_SOFT_RESET);
5318                 tmp |= grbm_soft_reset;
5319                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5320                 WREG32(GRBM_SOFT_RESET, tmp);
5321                 tmp = RREG32(GRBM_SOFT_RESET);
5322
5323                 udelay(50);
5324
5325                 tmp &= ~grbm_soft_reset;
5326                 WREG32(GRBM_SOFT_RESET, tmp);
5327                 tmp = RREG32(GRBM_SOFT_RESET);
5328         }
5329
5330         if (srbm_soft_reset) {
5331                 tmp = RREG32(SRBM_SOFT_RESET);
5332                 tmp |= srbm_soft_reset;
5333                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5334                 WREG32(SRBM_SOFT_RESET, tmp);
5335                 tmp = RREG32(SRBM_SOFT_RESET);
5336
5337                 udelay(50);
5338
5339                 tmp &= ~srbm_soft_reset;
5340                 WREG32(SRBM_SOFT_RESET, tmp);
5341                 tmp = RREG32(SRBM_SOFT_RESET);
5342         }
5343
5344         /* Wait a little for things to settle down */
5345         udelay(50);
5346
5347         evergreen_mc_resume(rdev, &save);
5348         udelay(50);
5349
5350         cik_print_gpu_status_regs(rdev);
5351 }
5352
5353 struct kv_reset_save_regs {
5354         u32 gmcon_reng_execute;
5355         u32 gmcon_misc;
5356         u32 gmcon_misc3;
5357 };
5358
5359 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5360                                    struct kv_reset_save_regs *save)
5361 {
5362         save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5363         save->gmcon_misc = RREG32(GMCON_MISC);
5364         save->gmcon_misc3 = RREG32(GMCON_MISC3);
5365
5366         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5367         WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5368                                                 STCTRL_STUTTER_EN));
5369 }
5370
5371 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5372                                       struct kv_reset_save_regs *save)
5373 {
5374         int i;
5375
5376         WREG32(GMCON_PGFSM_WRITE, 0);
5377         WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5378
5379         for (i = 0; i < 5; i++)
5380                 WREG32(GMCON_PGFSM_WRITE, 0);
5381
5382         WREG32(GMCON_PGFSM_WRITE, 0);
5383         WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5384
5385         for (i = 0; i < 5; i++)
5386                 WREG32(GMCON_PGFSM_WRITE, 0);
5387
5388         WREG32(GMCON_PGFSM_WRITE, 0x210000);
5389         WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5390
5391         for (i = 0; i < 5; i++)
5392                 WREG32(GMCON_PGFSM_WRITE, 0);
5393
5394         WREG32(GMCON_PGFSM_WRITE, 0x21003);
5395         WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5396
5397         for (i = 0; i < 5; i++)
5398                 WREG32(GMCON_PGFSM_WRITE, 0);
5399
5400         WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5401         WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5402
5403         for (i = 0; i < 5; i++)
5404                 WREG32(GMCON_PGFSM_WRITE, 0);
5405
5406         WREG32(GMCON_PGFSM_WRITE, 0);
5407         WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5408
5409         for (i = 0; i < 5; i++)
5410                 WREG32(GMCON_PGFSM_WRITE, 0);
5411
5412         WREG32(GMCON_PGFSM_WRITE, 0x420000);
5413         WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5414
5415         for (i = 0; i < 5; i++)
5416                 WREG32(GMCON_PGFSM_WRITE, 0);
5417
5418         WREG32(GMCON_PGFSM_WRITE, 0x120202);
5419         WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5420
5421         for (i = 0; i < 5; i++)
5422                 WREG32(GMCON_PGFSM_WRITE, 0);
5423
5424         WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5425         WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5426
5427         for (i = 0; i < 5; i++)
5428                 WREG32(GMCON_PGFSM_WRITE, 0);
5429
5430         WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5431         WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5432
5433         for (i = 0; i < 5; i++)
5434                 WREG32(GMCON_PGFSM_WRITE, 0);
5435
5436         WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5437         WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5438
5439         WREG32(GMCON_MISC3, save->gmcon_misc3);
5440         WREG32(GMCON_MISC, save->gmcon_misc);
5441         WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5442 }
5443
5444 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5445 {
5446         struct evergreen_mc_save save;
5447         struct kv_reset_save_regs kv_save = { 0 };
5448         u32 tmp, i;
5449
5450         dev_info(rdev->dev, "GPU pci config reset\n");
5451
5452         /* disable dpm? */
5453
5454         /* disable cg/pg */
5455         cik_fini_pg(rdev);
5456         cik_fini_cg(rdev);
5457
5458         /* Disable GFX parsing/prefetching */
5459         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5460
5461         /* Disable MEC parsing/prefetching */
5462         WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5463
5464         /* sdma0 */
5465         tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5466         tmp |= SDMA_HALT;
5467         WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5468         /* sdma1 */
5469         tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5470         tmp |= SDMA_HALT;
5471         WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5472         /* XXX other engines? */
5473
5474         /* halt the rlc, disable cp internal ints */
5475         cik_rlc_stop(rdev);
5476
5477         udelay(50);
5478
5479         /* disable mem access */
5480         evergreen_mc_stop(rdev, &save);
5481         if (evergreen_mc_wait_for_idle(rdev)) {
5482                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5483         }
5484
5485         if (rdev->flags & RADEON_IS_IGP)
5486                 kv_save_regs_for_reset(rdev, &kv_save);
5487
5488         /* disable BM */
5489         pci_clear_master(rdev->pdev);
5490         /* reset */
5491         radeon_pci_config_reset(rdev);
5492
5493         udelay(100);
5494
5495         /* wait for asic to come out of reset */
5496         for (i = 0; i < rdev->usec_timeout; i++) {
5497                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5498                         break;
5499                 udelay(1);
5500         }
5501
5502         /* does asic init need to be run first??? */
5503         if (rdev->flags & RADEON_IS_IGP)
5504                 kv_restore_regs_for_reset(rdev, &kv_save);
5505 }
5506
5507 /**
5508  * cik_asic_reset - soft reset GPU
5509  *
5510  * @rdev: radeon_device pointer
5511  *
5512  * Look up which blocks are hung and attempt
5513  * to reset them.
5514  * Returns 0 for success.
5515  */
5516 int cik_asic_reset(struct radeon_device *rdev)
5517 {
5518         u32 reset_mask;
5519
5520         reset_mask = cik_gpu_check_soft_reset(rdev);
5521
5522         if (reset_mask)
5523                 r600_set_bios_scratch_engine_hung(rdev, true);
5524
5525         /* try soft reset */
5526         cik_gpu_soft_reset(rdev, reset_mask);
5527
5528         reset_mask = cik_gpu_check_soft_reset(rdev);
5529
5530         /* try pci config reset */
5531         if (reset_mask && radeon_hard_reset)
5532                 cik_gpu_pci_config_reset(rdev);
5533
5534         reset_mask = cik_gpu_check_soft_reset(rdev);
5535
5536         if (!reset_mask)
5537                 r600_set_bios_scratch_engine_hung(rdev, false);
5538
5539         return 0;
5540 }
5541
5542 /**
5543  * cik_gfx_is_lockup - check if the 3D engine is locked up
5544  *
5545  * @rdev: radeon_device pointer
5546  * @ring: radeon_ring structure holding ring information
5547  *
5548  * Check if the 3D engine is locked up (CIK).
5549  * Returns true if the engine is locked, false if not.
5550  */
5551 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5552 {
5553         u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5554
5555         if (!(reset_mask & (RADEON_RESET_GFX |
5556                             RADEON_RESET_COMPUTE |
5557                             RADEON_RESET_CP))) {
5558                 radeon_ring_lockup_update(rdev, ring);
5559                 return false;
5560         }
5561         return radeon_ring_test_lockup(rdev, ring);
5562 }
5563
5564 /* MC */
5565 /**
5566  * cik_mc_program - program the GPU memory controller
5567  *
5568  * @rdev: radeon_device pointer
5569  *
5570  * Set the location of vram, gart, and AGP in the GPU's
5571  * physical address space (CIK).
5572  */
5573 static void cik_mc_program(struct radeon_device *rdev)
5574 {
5575         struct evergreen_mc_save save;
5576         u32 tmp;
5577         int i, j;
5578
5579         /* Initialize HDP */
5580         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5581                 WREG32((0x2c14 + j), 0x00000000);
5582                 WREG32((0x2c18 + j), 0x00000000);
5583                 WREG32((0x2c1c + j), 0x00000000);
5584                 WREG32((0x2c20 + j), 0x00000000);
5585                 WREG32((0x2c24 + j), 0x00000000);
5586         }
5587         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5588
5589         evergreen_mc_stop(rdev, &save);
5590         if (radeon_mc_wait_for_idle(rdev)) {
5591                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5592         }
5593         /* Lockout access through VGA aperture*/
5594         WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5595         /* Update configuration */
5596         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5597                rdev->mc.vram_start >> 12);
5598         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5599                rdev->mc.vram_end >> 12);
5600         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5601                rdev->vram_scratch.gpu_addr >> 12);
5602         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5603         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5604         WREG32(MC_VM_FB_LOCATION, tmp);
5605         /* XXX double check these! */
5606         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5607         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5608         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5609         WREG32(MC_VM_AGP_BASE, 0);
5610         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5611         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5612         if (radeon_mc_wait_for_idle(rdev)) {
5613                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5614         }
5615         evergreen_mc_resume(rdev, &save);
5616         /* we need to own VRAM, so turn off the VGA renderer here
5617          * to stop it overwriting our objects */
5618         rv515_vga_render_disable(rdev);
5619 }
5620
5621 /**
5622  * cik_mc_init - initialize the memory controller driver params
5623  *
5624  * @rdev: radeon_device pointer
5625  *
5626  * Look up the amount of vram, vram width, and decide how to place
5627  * vram and gart within the GPU's physical address space (CIK).
5628  * Returns 0 for success.
5629  */
5630 static int cik_mc_init(struct radeon_device *rdev)
5631 {
5632         u32 tmp;
5633         int chansize, numchan;
5634
5635         /* Get VRAM informations */
5636         rdev->mc.vram_is_ddr = true;
5637         tmp = RREG32(MC_ARB_RAMCFG);
5638         if (tmp & CHANSIZE_MASK) {
5639                 chansize = 64;
5640         } else {
5641                 chansize = 32;
5642         }
5643         tmp = RREG32(MC_SHARED_CHMAP);
5644         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5645         case 0:
5646         default:
5647                 numchan = 1;
5648                 break;
5649         case 1:
5650                 numchan = 2;
5651                 break;
5652         case 2:
5653                 numchan = 4;
5654                 break;
5655         case 3:
5656                 numchan = 8;
5657                 break;
5658         case 4:
5659                 numchan = 3;
5660                 break;
5661         case 5:
5662                 numchan = 6;
5663                 break;
5664         case 6:
5665                 numchan = 10;
5666                 break;
5667         case 7:
5668                 numchan = 12;
5669                 break;
5670         case 8:
5671                 numchan = 16;
5672                 break;
5673         }
5674         rdev->mc.vram_width = numchan * chansize;
5675         /* Could aper size report 0 ? */
5676         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5677         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5678         /* size in MB on si */
5679         rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5680         rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5681         rdev->mc.visible_vram_size = rdev->mc.aper_size;
5682         si_vram_gtt_location(rdev, &rdev->mc);
5683         radeon_update_bandwidth_info(rdev);
5684
5685         return 0;
5686 }
5687
5688 /*
5689  * GART
5690  * VMID 0 is the physical GPU addresses as used by the kernel.
5691  * VMIDs 1-15 are used for userspace clients and are handled
5692  * by the radeon vm/hsa code.
5693  */
5694 /**
5695  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5696  *
5697  * @rdev: radeon_device pointer
5698  *
5699  * Flush the TLB for the VMID 0 page table (CIK).
5700  */
5701 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5702 {
5703         /* flush hdp cache */
5704         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5705
5706         /* bits 0-15 are the VM contexts0-15 */
5707         WREG32(VM_INVALIDATE_REQUEST, 0x1);
5708 }
5709
5710 /**
5711  * cik_pcie_gart_enable - gart enable
5712  *
5713  * @rdev: radeon_device pointer
5714  *
5715  * This sets up the TLBs, programs the page tables for VMID0,
5716  * sets up the hw for VMIDs 1-15 which are allocated on
5717  * demand, and sets up the global locations for the LDS, GDS,
5718  * and GPUVM for FSA64 clients (CIK).
5719  * Returns 0 for success, errors for failure.
5720  */
5721 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5722 {
5723         int r, i;
5724
5725         if (rdev->gart.robj == NULL) {
5726                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5727                 return -EINVAL;
5728         }
5729         r = radeon_gart_table_vram_pin(rdev);
5730         if (r)
5731                 return r;
5732         /* Setup TLB control */
5733         WREG32(MC_VM_MX_L1_TLB_CNTL,
5734                (0xA << 7) |
5735                ENABLE_L1_TLB |
5736                ENABLE_L1_FRAGMENT_PROCESSING |
5737                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5738                ENABLE_ADVANCED_DRIVER_MODEL |
5739                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5740         /* Setup L2 cache */
5741         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5742                ENABLE_L2_FRAGMENT_PROCESSING |
5743                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5744                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5745                EFFECTIVE_L2_QUEUE_SIZE(7) |
5746                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5747         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5748         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5749                BANK_SELECT(4) |
5750                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5751         /* setup context0 */
5752         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5753         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5754         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5755         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5756                         (u32)(rdev->dummy_page.addr >> 12));
5757         WREG32(VM_CONTEXT0_CNTL2, 0);
5758         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5759                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5760
5761         WREG32(0x15D4, 0);
5762         WREG32(0x15D8, 0);
5763         WREG32(0x15DC, 0);
5764
5765         /* restore context1-15 */
5766         /* set vm size, must be a multiple of 4 */
5767         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5768         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5769         for (i = 1; i < 16; i++) {
5770                 if (i < 8)
5771                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5772                                rdev->vm_manager.saved_table_addr[i]);
5773                 else
5774                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5775                                rdev->vm_manager.saved_table_addr[i]);
5776         }
5777
5778         /* enable context1-15 */
5779         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5780                (u32)(rdev->dummy_page.addr >> 12));
5781         WREG32(VM_CONTEXT1_CNTL2, 4);
5782         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5783                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5784                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5785                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5786                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5787                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5788                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5789                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5790                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5791                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5792                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5793                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5794                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5795                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5796
5797         if (rdev->family == CHIP_KAVERI) {
5798                 u32 tmp = RREG32(CHUB_CONTROL);
5799                 tmp &= ~BYPASS_VM;
5800                 WREG32(CHUB_CONTROL, tmp);
5801         }
5802
5803         /* XXX SH_MEM regs */
5804         /* where to put LDS, scratch, GPUVM in FSA64 space */
5805         mutex_lock(&rdev->srbm_mutex);
5806         for (i = 0; i < 16; i++) {
5807                 cik_srbm_select(rdev, 0, 0, 0, i);
5808                 /* CP and shaders */
5809                 WREG32(SH_MEM_CONFIG, 0);
5810                 WREG32(SH_MEM_APE1_BASE, 1);
5811                 WREG32(SH_MEM_APE1_LIMIT, 0);
5812                 WREG32(SH_MEM_BASES, 0);
5813                 /* SDMA GFX */
5814                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5815                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5816                 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5817                 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5818                 /* XXX SDMA RLC - todo */
5819         }
5820         cik_srbm_select(rdev, 0, 0, 0, 0);
5821         mutex_unlock(&rdev->srbm_mutex);
5822
5823         cik_pcie_gart_tlb_flush(rdev);
5824         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5825                  (unsigned)(rdev->mc.gtt_size >> 20),
5826                  (unsigned long long)rdev->gart.table_addr);
5827         rdev->gart.ready = true;
5828         return 0;
5829 }
5830
5831 /**
5832  * cik_pcie_gart_disable - gart disable
5833  *
5834  * @rdev: radeon_device pointer
5835  *
5836  * This disables all VM page table (CIK).
5837  */
5838 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5839 {
5840         unsigned i;
5841
5842         for (i = 1; i < 16; ++i) {
5843                 uint32_t reg;
5844                 if (i < 8)
5845                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5846                 else
5847                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5848                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5849         }
5850
5851         /* Disable all tables */
5852         WREG32(VM_CONTEXT0_CNTL, 0);
5853         WREG32(VM_CONTEXT1_CNTL, 0);
5854         /* Setup TLB control */
5855         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5856                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5857         /* Setup L2 cache */
5858         WREG32(VM_L2_CNTL,
5859                ENABLE_L2_FRAGMENT_PROCESSING |
5860                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5861                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5862                EFFECTIVE_L2_QUEUE_SIZE(7) |
5863                CONTEXT1_IDENTITY_ACCESS_MODE(1));
5864         WREG32(VM_L2_CNTL2, 0);
5865         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5866                L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5867         radeon_gart_table_vram_unpin(rdev);
5868 }
5869
5870 /**
5871  * cik_pcie_gart_fini - vm fini callback
5872  *
5873  * @rdev: radeon_device pointer
5874  *
5875  * Tears down the driver GART/VM setup (CIK).
5876  */
5877 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5878 {
5879         cik_pcie_gart_disable(rdev);
5880         radeon_gart_table_vram_free(rdev);
5881         radeon_gart_fini(rdev);
5882 }
5883
5884 /* vm parser */
5885 /**
5886  * cik_ib_parse - vm ib_parse callback
5887  *
5888  * @rdev: radeon_device pointer
5889  * @ib: indirect buffer pointer
5890  *
5891  * CIK uses hw IB checking so this is a nop (CIK).
5892  */
5893 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5894 {
5895         return 0;
5896 }
5897
5898 /*
5899  * vm
5900  * VMID 0 is the physical GPU addresses as used by the kernel.
5901  * VMIDs 1-15 are used for userspace clients and are handled
5902  * by the radeon vm/hsa code.
5903  */
5904 /**
5905  * cik_vm_init - cik vm init callback
5906  *
5907  * @rdev: radeon_device pointer
5908  *
5909  * Inits cik specific vm parameters (number of VMs, base of vram for
5910  * VMIDs 1-15) (CIK).
5911  * Returns 0 for success.
5912  */
5913 int cik_vm_init(struct radeon_device *rdev)
5914 {
5915         /*
5916          * number of VMs
5917          * VMID 0 is reserved for System
5918          * radeon graphics/compute will use VMIDs 1-7
5919          * amdkfd will use VMIDs 8-15
5920          */
5921         rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
5922         /* base offset of vram pages */
5923         if (rdev->flags & RADEON_IS_IGP) {
5924                 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5925                 tmp <<= 22;
5926                 rdev->vm_manager.vram_base_offset = tmp;
5927         } else
5928                 rdev->vm_manager.vram_base_offset = 0;
5929
5930         return 0;
5931 }
5932
5933 /**
5934  * cik_vm_fini - cik vm fini callback
5935  *
5936  * @rdev: radeon_device pointer
5937  *
5938  * Tear down any asic specific VM setup (CIK).
5939  */
5940 void cik_vm_fini(struct radeon_device *rdev)
5941 {
5942 }
5943
5944 /**
5945  * cik_vm_decode_fault - print human readable fault info
5946  *
5947  * @rdev: radeon_device pointer
5948  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5949  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5950  *
5951  * Print human readable fault information (CIK).
5952  */
5953 static void cik_vm_decode_fault(struct radeon_device *rdev,
5954                                 u32 status, u32 addr, u32 mc_client)
5955 {
5956         u32 mc_id;
5957         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5958         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5959         char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5960                 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5961
5962         if (rdev->family == CHIP_HAWAII)
5963                 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5964         else
5965                 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5966
5967         printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5968                protections, vmid, addr,
5969                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5970                block, mc_client, mc_id);
5971 }
5972
5973 /**
5974  * cik_vm_flush - cik vm flush using the CP
5975  *
5976  * @rdev: radeon_device pointer
5977  *
5978  * Update the page table base and flush the VM TLB
5979  * using the CP (CIK).
5980  */
5981 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5982                   unsigned vm_id, uint64_t pd_addr)
5983 {
5984         int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5985
5986         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5987         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5988                                  WRITE_DATA_DST_SEL(0)));
5989         if (vm_id < 8) {
5990                 radeon_ring_write(ring,
5991                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5992         } else {
5993                 radeon_ring_write(ring,
5994                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5995         }
5996         radeon_ring_write(ring, 0);
5997         radeon_ring_write(ring, pd_addr >> 12);
5998
5999         /* update SH_MEM_* regs */
6000         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6001         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6002                                  WRITE_DATA_DST_SEL(0)));
6003         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6004         radeon_ring_write(ring, 0);
6005         radeon_ring_write(ring, VMID(vm_id));
6006
6007         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6008         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6009                                  WRITE_DATA_DST_SEL(0)));
6010         radeon_ring_write(ring, SH_MEM_BASES >> 2);
6011         radeon_ring_write(ring, 0);
6012
6013         radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6014         radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6015         radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6016         radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6017
6018         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6019         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6020                                  WRITE_DATA_DST_SEL(0)));
6021         radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6022         radeon_ring_write(ring, 0);
6023         radeon_ring_write(ring, VMID(0));
6024
6025         /* HDP flush */
6026         cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6027
6028         /* bits 0-15 are the VM contexts0-15 */
6029         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6030         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6031                                  WRITE_DATA_DST_SEL(0)));
6032         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6033         radeon_ring_write(ring, 0);
6034         radeon_ring_write(ring, 1 << vm_id);
6035
6036         /* compute doesn't have PFP */
6037         if (usepfp) {
6038                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6039                 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6040                 radeon_ring_write(ring, 0x0);
6041         }
6042 }
6043
6044 /*
6045  * RLC
6046  * The RLC is a multi-purpose microengine that handles a
6047  * variety of functions, the most important of which is
6048  * the interrupt controller.
6049  */
6050 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6051                                           bool enable)
6052 {
6053         u32 tmp = RREG32(CP_INT_CNTL_RING0);
6054
6055         if (enable)
6056                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6057         else
6058                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6059         WREG32(CP_INT_CNTL_RING0, tmp);
6060 }
6061
6062 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6063 {
6064         u32 tmp;
6065
6066         tmp = RREG32(RLC_LB_CNTL);
6067         if (enable)
6068                 tmp |= LOAD_BALANCE_ENABLE;
6069         else
6070                 tmp &= ~LOAD_BALANCE_ENABLE;
6071         WREG32(RLC_LB_CNTL, tmp);
6072 }
6073
6074 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6075 {
6076         u32 i, j, k;
6077         u32 mask;
6078
6079         mutex_lock(&rdev->grbm_idx_mutex);
6080         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6081                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6082                         cik_select_se_sh(rdev, i, j);
6083                         for (k = 0; k < rdev->usec_timeout; k++) {
6084                                 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6085                                         break;
6086                                 udelay(1);
6087                         }
6088                 }
6089         }
6090         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6091         mutex_unlock(&rdev->grbm_idx_mutex);
6092
6093         mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6094         for (k = 0; k < rdev->usec_timeout; k++) {
6095                 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6096                         break;
6097                 udelay(1);
6098         }
6099 }
6100
6101 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6102 {
6103         u32 tmp;
6104
6105         tmp = RREG32(RLC_CNTL);
6106         if (tmp != rlc)
6107                 WREG32(RLC_CNTL, rlc);
6108 }
6109
6110 static u32 cik_halt_rlc(struct radeon_device *rdev)
6111 {
6112         u32 data, orig;
6113
6114         orig = data = RREG32(RLC_CNTL);
6115
6116         if (data & RLC_ENABLE) {
6117                 u32 i;
6118
6119                 data &= ~RLC_ENABLE;
6120                 WREG32(RLC_CNTL, data);
6121
6122                 for (i = 0; i < rdev->usec_timeout; i++) {
6123                         if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6124                                 break;
6125                         udelay(1);
6126                 }
6127
6128                 cik_wait_for_rlc_serdes(rdev);
6129         }
6130
6131         return orig;
6132 }
6133
6134 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6135 {
6136         u32 tmp, i, mask;
6137
6138         tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6139         WREG32(RLC_GPR_REG2, tmp);
6140
6141         mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6142         for (i = 0; i < rdev->usec_timeout; i++) {
6143                 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6144                         break;
6145                 udelay(1);
6146         }
6147
6148         for (i = 0; i < rdev->usec_timeout; i++) {
6149                 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6150                         break;
6151                 udelay(1);
6152         }
6153 }
6154
6155 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6156 {
6157         u32 tmp;
6158
6159         tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6160         WREG32(RLC_GPR_REG2, tmp);
6161 }
6162
6163 /**
6164  * cik_rlc_stop - stop the RLC ME
6165  *
6166  * @rdev: radeon_device pointer
6167  *
6168  * Halt the RLC ME (MicroEngine) (CIK).
6169  */
6170 static void cik_rlc_stop(struct radeon_device *rdev)
6171 {
6172         WREG32(RLC_CNTL, 0);
6173
6174         cik_enable_gui_idle_interrupt(rdev, false);
6175
6176         cik_wait_for_rlc_serdes(rdev);
6177 }
6178
6179 /**
6180  * cik_rlc_start - start the RLC ME
6181  *
6182  * @rdev: radeon_device pointer
6183  *
6184  * Unhalt the RLC ME (MicroEngine) (CIK).
6185  */
6186 static void cik_rlc_start(struct radeon_device *rdev)
6187 {
6188         WREG32(RLC_CNTL, RLC_ENABLE);
6189
6190         cik_enable_gui_idle_interrupt(rdev, true);
6191
6192         udelay(50);
6193 }
6194
6195 /**
6196  * cik_rlc_resume - setup the RLC hw
6197  *
6198  * @rdev: radeon_device pointer
6199  *
6200  * Initialize the RLC registers, load the ucode,
6201  * and start the RLC (CIK).
6202  * Returns 0 for success, -EINVAL if the ucode is not available.
6203  */
6204 static int cik_rlc_resume(struct radeon_device *rdev)
6205 {
6206         u32 i, size, tmp;
6207
6208         if (!rdev->rlc_fw)
6209                 return -EINVAL;
6210
6211         cik_rlc_stop(rdev);
6212
6213         /* disable CG */
6214         tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6215         WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6216
6217         si_rlc_reset(rdev);
6218
6219         cik_init_pg(rdev);
6220
6221         cik_init_cg(rdev);
6222
6223         WREG32(RLC_LB_CNTR_INIT, 0);
6224         WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6225
6226         mutex_lock(&rdev->grbm_idx_mutex);
6227         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6228         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6229         WREG32(RLC_LB_PARAMS, 0x00600408);
6230         WREG32(RLC_LB_CNTL, 0x80000004);
6231         mutex_unlock(&rdev->grbm_idx_mutex);
6232
6233         WREG32(RLC_MC_CNTL, 0);
6234         WREG32(RLC_UCODE_CNTL, 0);
6235
6236         if (rdev->new_fw) {
6237                 const struct rlc_firmware_header_v1_0 *hdr =
6238                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6239                 const __le32 *fw_data = (const __le32 *)
6240                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6241
6242                 radeon_ucode_print_rlc_hdr(&hdr->header);
6243
6244                 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6245                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6246                 for (i = 0; i < size; i++)
6247                         WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6248                 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6249         } else {
6250                 const __be32 *fw_data;
6251
6252                 switch (rdev->family) {
6253                 case CHIP_BONAIRE:
6254                 case CHIP_HAWAII:
6255                 default:
6256                         size = BONAIRE_RLC_UCODE_SIZE;
6257                         break;
6258                 case CHIP_KAVERI:
6259                         size = KV_RLC_UCODE_SIZE;
6260                         break;
6261                 case CHIP_KABINI:
6262                         size = KB_RLC_UCODE_SIZE;
6263                         break;
6264                 case CHIP_MULLINS:
6265                         size = ML_RLC_UCODE_SIZE;
6266                         break;
6267                 }
6268
6269                 fw_data = (const __be32 *)rdev->rlc_fw->data;
6270                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6271                 for (i = 0; i < size; i++)
6272                         WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6273                 WREG32(RLC_GPM_UCODE_ADDR, 0);
6274         }
6275
6276         /* XXX - find out what chips support lbpw */
6277         cik_enable_lbpw(rdev, false);
6278
6279         if (rdev->family == CHIP_BONAIRE)
6280                 WREG32(RLC_DRIVER_DMA_STATUS, 0);
6281
6282         cik_rlc_start(rdev);
6283
6284         return 0;
6285 }
6286
6287 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6288 {
6289         u32 data, orig, tmp, tmp2;
6290
6291         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6292
6293         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6294                 cik_enable_gui_idle_interrupt(rdev, true);
6295
6296                 tmp = cik_halt_rlc(rdev);
6297
6298                 mutex_lock(&rdev->grbm_idx_mutex);
6299                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6300                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6301                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6302                 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6303                 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6304                 mutex_unlock(&rdev->grbm_idx_mutex);
6305
6306                 cik_update_rlc(rdev, tmp);
6307
6308                 data |= CGCG_EN | CGLS_EN;
6309         } else {
6310                 cik_enable_gui_idle_interrupt(rdev, false);
6311
6312                 RREG32(CB_CGTT_SCLK_CTRL);
6313                 RREG32(CB_CGTT_SCLK_CTRL);
6314                 RREG32(CB_CGTT_SCLK_CTRL);
6315                 RREG32(CB_CGTT_SCLK_CTRL);
6316
6317                 data &= ~(CGCG_EN | CGLS_EN);
6318         }
6319
6320         if (orig != data)
6321                 WREG32(RLC_CGCG_CGLS_CTRL, data);
6322
6323 }
6324
6325 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6326 {
6327         u32 data, orig, tmp = 0;
6328
6329         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6330                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6331                         if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6332                                 orig = data = RREG32(CP_MEM_SLP_CNTL);
6333                                 data |= CP_MEM_LS_EN;
6334                                 if (orig != data)
6335                                         WREG32(CP_MEM_SLP_CNTL, data);
6336                         }
6337                 }
6338
6339                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6340                 data |= 0x00000001;
6341                 data &= 0xfffffffd;
6342                 if (orig != data)
6343                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6344
6345                 tmp = cik_halt_rlc(rdev);
6346
6347                 mutex_lock(&rdev->grbm_idx_mutex);
6348                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6349                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6350                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6351                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6352                 WREG32(RLC_SERDES_WR_CTRL, data);
6353                 mutex_unlock(&rdev->grbm_idx_mutex);
6354
6355                 cik_update_rlc(rdev, tmp);
6356
6357                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6358                         orig = data = RREG32(CGTS_SM_CTRL_REG);
6359                         data &= ~SM_MODE_MASK;
6360                         data |= SM_MODE(0x2);
6361                         data |= SM_MODE_ENABLE;
6362                         data &= ~CGTS_OVERRIDE;
6363                         if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6364                             (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6365                                 data &= ~CGTS_LS_OVERRIDE;
6366                         data &= ~ON_MONITOR_ADD_MASK;
6367                         data |= ON_MONITOR_ADD_EN;
6368                         data |= ON_MONITOR_ADD(0x96);
6369                         if (orig != data)
6370                                 WREG32(CGTS_SM_CTRL_REG, data);
6371                 }
6372         } else {
6373                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6374                 data |= 0x00000003;
6375                 if (orig != data)
6376                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6377
6378                 data = RREG32(RLC_MEM_SLP_CNTL);
6379                 if (data & RLC_MEM_LS_EN) {
6380                         data &= ~RLC_MEM_LS_EN;
6381                         WREG32(RLC_MEM_SLP_CNTL, data);
6382                 }
6383
6384                 data = RREG32(CP_MEM_SLP_CNTL);
6385                 if (data & CP_MEM_LS_EN) {
6386                         data &= ~CP_MEM_LS_EN;
6387                         WREG32(CP_MEM_SLP_CNTL, data);
6388                 }
6389
6390                 orig = data = RREG32(CGTS_SM_CTRL_REG);
6391                 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6392                 if (orig != data)
6393                         WREG32(CGTS_SM_CTRL_REG, data);
6394
6395                 tmp = cik_halt_rlc(rdev);
6396
6397                 mutex_lock(&rdev->grbm_idx_mutex);
6398                 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6399                 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6400                 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6401                 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6402                 WREG32(RLC_SERDES_WR_CTRL, data);
6403                 mutex_unlock(&rdev->grbm_idx_mutex);
6404
6405                 cik_update_rlc(rdev, tmp);
6406         }
6407 }
6408
6409 static const u32 mc_cg_registers[] =
6410 {
6411         MC_HUB_MISC_HUB_CG,
6412         MC_HUB_MISC_SIP_CG,
6413         MC_HUB_MISC_VM_CG,
6414         MC_XPB_CLK_GAT,
6415         ATC_MISC_CG,
6416         MC_CITF_MISC_WR_CG,
6417         MC_CITF_MISC_RD_CG,
6418         MC_CITF_MISC_VM_CG,
6419         VM_L2_CG,
6420 };
6421
6422 static void cik_enable_mc_ls(struct radeon_device *rdev,
6423                              bool enable)
6424 {
6425         int i;
6426         u32 orig, data;
6427
6428         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6429                 orig = data = RREG32(mc_cg_registers[i]);
6430                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6431                         data |= MC_LS_ENABLE;
6432                 else
6433                         data &= ~MC_LS_ENABLE;
6434                 if (data != orig)
6435                         WREG32(mc_cg_registers[i], data);
6436         }
6437 }
6438
6439 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6440                                bool enable)
6441 {
6442         int i;
6443         u32 orig, data;
6444
6445         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6446                 orig = data = RREG32(mc_cg_registers[i]);
6447                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6448                         data |= MC_CG_ENABLE;
6449                 else
6450                         data &= ~MC_CG_ENABLE;
6451                 if (data != orig)
6452                         WREG32(mc_cg_registers[i], data);
6453         }
6454 }
6455
6456 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6457                                  bool enable)
6458 {
6459         u32 orig, data;
6460
6461         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6462                 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6463                 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6464         } else {
6465                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6466                 data |= 0xff000000;
6467                 if (data != orig)
6468                         WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6469
6470                 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6471                 data |= 0xff000000;
6472                 if (data != orig)
6473                         WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6474         }
6475 }
6476
6477 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6478                                  bool enable)
6479 {
6480         u32 orig, data;
6481
6482         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6483                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6484                 data |= 0x100;
6485                 if (orig != data)
6486                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6487
6488                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6489                 data |= 0x100;
6490                 if (orig != data)
6491                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6492         } else {
6493                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6494                 data &= ~0x100;
6495                 if (orig != data)
6496                         WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6497
6498                 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6499                 data &= ~0x100;
6500                 if (orig != data)
6501                         WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6502         }
6503 }
6504
6505 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6506                                 bool enable)
6507 {
6508         u32 orig, data;
6509
6510         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6511                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6512                 data = 0xfff;
6513                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6514
6515                 orig = data = RREG32(UVD_CGC_CTRL);
6516                 data |= DCM;
6517                 if (orig != data)
6518                         WREG32(UVD_CGC_CTRL, data);
6519         } else {
6520                 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6521                 data &= ~0xfff;
6522                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6523
6524                 orig = data = RREG32(UVD_CGC_CTRL);
6525                 data &= ~DCM;
6526                 if (orig != data)
6527                         WREG32(UVD_CGC_CTRL, data);
6528         }
6529 }
6530
6531 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6532                                bool enable)
6533 {
6534         u32 orig, data;
6535
6536         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6537
6538         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6539                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6540                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6541         else
6542                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6543                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6544
6545         if (orig != data)
6546                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6547 }
6548
6549 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6550                                 bool enable)
6551 {
6552         u32 orig, data;
6553
6554         orig = data = RREG32(HDP_HOST_PATH_CNTL);
6555
6556         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6557                 data &= ~CLOCK_GATING_DIS;
6558         else
6559                 data |= CLOCK_GATING_DIS;
6560
6561         if (orig != data)
6562                 WREG32(HDP_HOST_PATH_CNTL, data);
6563 }
6564
6565 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6566                               bool enable)
6567 {
6568         u32 orig, data;
6569
6570         orig = data = RREG32(HDP_MEM_POWER_LS);
6571
6572         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6573                 data |= HDP_LS_ENABLE;
6574         else
6575                 data &= ~HDP_LS_ENABLE;
6576
6577         if (orig != data)
6578                 WREG32(HDP_MEM_POWER_LS, data);
6579 }
6580
6581 void cik_update_cg(struct radeon_device *rdev,
6582                    u32 block, bool enable)
6583 {
6584
6585         if (block & RADEON_CG_BLOCK_GFX) {
6586                 cik_enable_gui_idle_interrupt(rdev, false);
6587                 /* order matters! */
6588                 if (enable) {
6589                         cik_enable_mgcg(rdev, true);
6590                         cik_enable_cgcg(rdev, true);
6591                 } else {
6592                         cik_enable_cgcg(rdev, false);
6593                         cik_enable_mgcg(rdev, false);
6594                 }
6595                 cik_enable_gui_idle_interrupt(rdev, true);
6596         }
6597
6598         if (block & RADEON_CG_BLOCK_MC) {
6599                 if (!(rdev->flags & RADEON_IS_IGP)) {
6600                         cik_enable_mc_mgcg(rdev, enable);
6601                         cik_enable_mc_ls(rdev, enable);
6602                 }
6603         }
6604
6605         if (block & RADEON_CG_BLOCK_SDMA) {
6606                 cik_enable_sdma_mgcg(rdev, enable);
6607                 cik_enable_sdma_mgls(rdev, enable);
6608         }
6609
6610         if (block & RADEON_CG_BLOCK_BIF) {
6611                 cik_enable_bif_mgls(rdev, enable);
6612         }
6613
6614         if (block & RADEON_CG_BLOCK_UVD) {
6615                 if (rdev->has_uvd)
6616                         cik_enable_uvd_mgcg(rdev, enable);
6617         }
6618
6619         if (block & RADEON_CG_BLOCK_HDP) {
6620                 cik_enable_hdp_mgcg(rdev, enable);
6621                 cik_enable_hdp_ls(rdev, enable);
6622         }
6623
6624         if (block & RADEON_CG_BLOCK_VCE) {
6625                 vce_v2_0_enable_mgcg(rdev, enable);
6626         }
6627 }
6628
6629 static void cik_init_cg(struct radeon_device *rdev)
6630 {
6631
6632         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6633
6634         if (rdev->has_uvd)
6635                 si_init_uvd_internal_cg(rdev);
6636
6637         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6638                              RADEON_CG_BLOCK_SDMA |
6639                              RADEON_CG_BLOCK_BIF |
6640                              RADEON_CG_BLOCK_UVD |
6641                              RADEON_CG_BLOCK_HDP), true);
6642 }
6643
6644 static void cik_fini_cg(struct radeon_device *rdev)
6645 {
6646         cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6647                              RADEON_CG_BLOCK_SDMA |
6648                              RADEON_CG_BLOCK_BIF |
6649                              RADEON_CG_BLOCK_UVD |
6650                              RADEON_CG_BLOCK_HDP), false);
6651
6652         cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6653 }
6654
6655 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6656                                           bool enable)
6657 {
6658         u32 data, orig;
6659
6660         orig = data = RREG32(RLC_PG_CNTL);
6661         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6662                 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6663         else
6664                 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6665         if (orig != data)
6666                 WREG32(RLC_PG_CNTL, data);
6667 }
6668
6669 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6670                                           bool enable)
6671 {
6672         u32 data, orig;
6673
6674         orig = data = RREG32(RLC_PG_CNTL);
6675         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6676                 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6677         else
6678                 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6679         if (orig != data)
6680                 WREG32(RLC_PG_CNTL, data);
6681 }
6682
6683 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6684 {
6685         u32 data, orig;
6686
6687         orig = data = RREG32(RLC_PG_CNTL);
6688         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6689                 data &= ~DISABLE_CP_PG;
6690         else
6691                 data |= DISABLE_CP_PG;
6692         if (orig != data)
6693                 WREG32(RLC_PG_CNTL, data);
6694 }
6695
6696 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6697 {
6698         u32 data, orig;
6699
6700         orig = data = RREG32(RLC_PG_CNTL);
6701         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6702                 data &= ~DISABLE_GDS_PG;
6703         else
6704                 data |= DISABLE_GDS_PG;
6705         if (orig != data)
6706                 WREG32(RLC_PG_CNTL, data);
6707 }
6708
6709 #define CP_ME_TABLE_SIZE    96
6710 #define CP_ME_TABLE_OFFSET  2048
6711 #define CP_MEC_TABLE_OFFSET 4096
6712
6713 void cik_init_cp_pg_table(struct radeon_device *rdev)
6714 {
6715         volatile u32 *dst_ptr;
6716         int me, i, max_me = 4;
6717         u32 bo_offset = 0;
6718         u32 table_offset, table_size;
6719
6720         if (rdev->family == CHIP_KAVERI)
6721                 max_me = 5;
6722
6723         if (rdev->rlc.cp_table_ptr == NULL)
6724                 return;
6725
6726         /* write the cp table buffer */
6727         dst_ptr = rdev->rlc.cp_table_ptr;
6728         for (me = 0; me < max_me; me++) {
6729                 if (rdev->new_fw) {
6730                         const __le32 *fw_data;
6731                         const struct gfx_firmware_header_v1_0 *hdr;
6732
6733                         if (me == 0) {
6734                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6735                                 fw_data = (const __le32 *)
6736                                         (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6737                                 table_offset = le32_to_cpu(hdr->jt_offset);
6738                                 table_size = le32_to_cpu(hdr->jt_size);
6739                         } else if (me == 1) {
6740                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6741                                 fw_data = (const __le32 *)
6742                                         (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6743                                 table_offset = le32_to_cpu(hdr->jt_offset);
6744                                 table_size = le32_to_cpu(hdr->jt_size);
6745                         } else if (me == 2) {
6746                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6747                                 fw_data = (const __le32 *)
6748                                         (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6749                                 table_offset = le32_to_cpu(hdr->jt_offset);
6750                                 table_size = le32_to_cpu(hdr->jt_size);
6751                         } else if (me == 3) {
6752                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6753                                 fw_data = (const __le32 *)
6754                                         (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6755                                 table_offset = le32_to_cpu(hdr->jt_offset);
6756                                 table_size = le32_to_cpu(hdr->jt_size);
6757                         } else {
6758                                 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6759                                 fw_data = (const __le32 *)
6760                                         (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6761                                 table_offset = le32_to_cpu(hdr->jt_offset);
6762                                 table_size = le32_to_cpu(hdr->jt_size);
6763                         }
6764
6765                         for (i = 0; i < table_size; i ++) {
6766                                 dst_ptr[bo_offset + i] =
6767                                         cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6768                         }
6769                         bo_offset += table_size;
6770                 } else {
6771                         const __be32 *fw_data;
6772                         table_size = CP_ME_TABLE_SIZE;
6773
6774                         if (me == 0) {
6775                                 fw_data = (const __be32 *)rdev->ce_fw->data;
6776                                 table_offset = CP_ME_TABLE_OFFSET;
6777                         } else if (me == 1) {
6778                                 fw_data = (const __be32 *)rdev->pfp_fw->data;
6779                                 table_offset = CP_ME_TABLE_OFFSET;
6780                         } else if (me == 2) {
6781                                 fw_data = (const __be32 *)rdev->me_fw->data;
6782                                 table_offset = CP_ME_TABLE_OFFSET;
6783                         } else {
6784                                 fw_data = (const __be32 *)rdev->mec_fw->data;
6785                                 table_offset = CP_MEC_TABLE_OFFSET;
6786                         }
6787
6788                         for (i = 0; i < table_size; i ++) {
6789                                 dst_ptr[bo_offset + i] =
6790                                         cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6791                         }
6792                         bo_offset += table_size;
6793                 }
6794         }
6795 }
6796
6797 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6798                                 bool enable)
6799 {
6800         u32 data, orig;
6801
6802         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6803                 orig = data = RREG32(RLC_PG_CNTL);
6804                 data |= GFX_PG_ENABLE;
6805                 if (orig != data)
6806                         WREG32(RLC_PG_CNTL, data);
6807
6808                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6809                 data |= AUTO_PG_EN;
6810                 if (orig != data)
6811                         WREG32(RLC_AUTO_PG_CTRL, data);
6812         } else {
6813                 orig = data = RREG32(RLC_PG_CNTL);
6814                 data &= ~GFX_PG_ENABLE;
6815                 if (orig != data)
6816                         WREG32(RLC_PG_CNTL, data);
6817
6818                 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6819                 data &= ~AUTO_PG_EN;
6820                 if (orig != data)
6821                         WREG32(RLC_AUTO_PG_CTRL, data);
6822
6823                 data = RREG32(DB_RENDER_CONTROL);
6824         }
6825 }
6826
6827 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6828 {
6829         u32 mask = 0, tmp, tmp1;
6830         int i;
6831
6832         mutex_lock(&rdev->grbm_idx_mutex);
6833         cik_select_se_sh(rdev, se, sh);
6834         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6835         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6836         cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6837         mutex_unlock(&rdev->grbm_idx_mutex);
6838
6839         tmp &= 0xffff0000;
6840
6841         tmp |= tmp1;
6842         tmp >>= 16;
6843
6844         for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6845                 mask <<= 1;
6846                 mask |= 1;
6847         }
6848
6849         return (~tmp) & mask;
6850 }
6851
6852 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6853 {
6854         u32 i, j, k, active_cu_number = 0;
6855         u32 mask, counter, cu_bitmap;
6856         u32 tmp = 0;
6857
6858         for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6859                 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6860                         mask = 1;
6861                         cu_bitmap = 0;
6862                         counter = 0;
6863                         for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6864                                 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6865                                         if (counter < 2)
6866                                                 cu_bitmap |= mask;
6867                                         counter ++;
6868                                 }
6869                                 mask <<= 1;
6870                         }
6871
6872                         active_cu_number += counter;
6873                         tmp |= (cu_bitmap << (i * 16 + j * 8));
6874                 }
6875         }
6876
6877         WREG32(RLC_PG_AO_CU_MASK, tmp);
6878
6879         tmp = RREG32(RLC_MAX_PG_CU);
6880         tmp &= ~MAX_PU_CU_MASK;
6881         tmp |= MAX_PU_CU(active_cu_number);
6882         WREG32(RLC_MAX_PG_CU, tmp);
6883 }
6884
6885 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6886                                        bool enable)
6887 {
6888         u32 data, orig;
6889
6890         orig = data = RREG32(RLC_PG_CNTL);
6891         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6892                 data |= STATIC_PER_CU_PG_ENABLE;
6893         else
6894                 data &= ~STATIC_PER_CU_PG_ENABLE;
6895         if (orig != data)
6896                 WREG32(RLC_PG_CNTL, data);
6897 }
6898
6899 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6900                                         bool enable)
6901 {
6902         u32 data, orig;
6903
6904         orig = data = RREG32(RLC_PG_CNTL);
6905         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6906                 data |= DYN_PER_CU_PG_ENABLE;
6907         else
6908                 data &= ~DYN_PER_CU_PG_ENABLE;
6909         if (orig != data)
6910                 WREG32(RLC_PG_CNTL, data);
6911 }
6912
6913 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6914 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6915
6916 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6917 {
6918         u32 data, orig;
6919         u32 i;
6920
6921         if (rdev->rlc.cs_data) {
6922                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6923                 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6924                 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6925                 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6926         } else {
6927                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6928                 for (i = 0; i < 3; i++)
6929                         WREG32(RLC_GPM_SCRATCH_DATA, 0);
6930         }
6931         if (rdev->rlc.reg_list) {
6932                 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6933                 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6934                         WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6935         }
6936
6937         orig = data = RREG32(RLC_PG_CNTL);
6938         data |= GFX_PG_SRC;
6939         if (orig != data)
6940                 WREG32(RLC_PG_CNTL, data);
6941
6942         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6943         WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6944
6945         data = RREG32(CP_RB_WPTR_POLL_CNTL);
6946         data &= ~IDLE_POLL_COUNT_MASK;
6947         data |= IDLE_POLL_COUNT(0x60);
6948         WREG32(CP_RB_WPTR_POLL_CNTL, data);
6949
6950         data = 0x10101010;
6951         WREG32(RLC_PG_DELAY, data);
6952
6953         data = RREG32(RLC_PG_DELAY_2);
6954         data &= ~0xff;
6955         data |= 0x3;
6956         WREG32(RLC_PG_DELAY_2, data);
6957
6958         data = RREG32(RLC_AUTO_PG_CTRL);
6959         data &= ~GRBM_REG_SGIT_MASK;
6960         data |= GRBM_REG_SGIT(0x700);
6961         WREG32(RLC_AUTO_PG_CTRL, data);
6962
6963 }
6964
6965 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6966 {
6967         cik_enable_gfx_cgpg(rdev, enable);
6968         cik_enable_gfx_static_mgpg(rdev, enable);
6969         cik_enable_gfx_dynamic_mgpg(rdev, enable);
6970 }
6971
6972 u32 cik_get_csb_size(struct radeon_device *rdev)
6973 {
6974         u32 count = 0;
6975         const struct cs_section_def *sect = NULL;
6976         const struct cs_extent_def *ext = NULL;
6977
6978         if (rdev->rlc.cs_data == NULL)
6979                 return 0;
6980
6981         /* begin clear state */
6982         count += 2;
6983         /* context control state */
6984         count += 3;
6985
6986         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6987                 for (ext = sect->section; ext->extent != NULL; ++ext) {
6988                         if (sect->id == SECT_CONTEXT)
6989                                 count += 2 + ext->reg_count;
6990                         else
6991                                 return 0;
6992                 }
6993         }
6994         /* pa_sc_raster_config/pa_sc_raster_config1 */
6995         count += 4;
6996         /* end clear state */
6997         count += 2;
6998         /* clear state */
6999         count += 2;
7000
7001         return count;
7002 }
7003
7004 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7005 {
7006         u32 count = 0, i;
7007         const struct cs_section_def *sect = NULL;
7008         const struct cs_extent_def *ext = NULL;
7009
7010         if (rdev->rlc.cs_data == NULL)
7011                 return;
7012         if (buffer == NULL)
7013                 return;
7014
7015         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7016         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7017
7018         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7019         buffer[count++] = cpu_to_le32(0x80000000);
7020         buffer[count++] = cpu_to_le32(0x80000000);
7021
7022         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7023                 for (ext = sect->section; ext->extent != NULL; ++ext) {
7024                         if (sect->id == SECT_CONTEXT) {
7025                                 buffer[count++] =
7026                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7027                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7028                                 for (i = 0; i < ext->reg_count; i++)
7029                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
7030                         } else {
7031                                 return;
7032                         }
7033                 }
7034         }
7035
7036         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7037         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7038         switch (rdev->family) {
7039         case CHIP_BONAIRE:
7040                 buffer[count++] = cpu_to_le32(0x16000012);
7041                 buffer[count++] = cpu_to_le32(0x00000000);
7042                 break;
7043         case CHIP_KAVERI:
7044                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7045                 buffer[count++] = cpu_to_le32(0x00000000);
7046                 break;
7047         case CHIP_KABINI:
7048         case CHIP_MULLINS:
7049                 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7050                 buffer[count++] = cpu_to_le32(0x00000000);
7051                 break;
7052         case CHIP_HAWAII:
7053                 buffer[count++] = cpu_to_le32(0x3a00161a);
7054                 buffer[count++] = cpu_to_le32(0x0000002e);
7055                 break;
7056         default:
7057                 buffer[count++] = cpu_to_le32(0x00000000);
7058                 buffer[count++] = cpu_to_le32(0x00000000);
7059                 break;
7060         }
7061
7062         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7063         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7064
7065         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7066         buffer[count++] = cpu_to_le32(0);
7067 }
7068
7069 static void cik_init_pg(struct radeon_device *rdev)
7070 {
7071         if (rdev->pg_flags) {
7072                 cik_enable_sck_slowdown_on_pu(rdev, true);
7073                 cik_enable_sck_slowdown_on_pd(rdev, true);
7074                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7075                         cik_init_gfx_cgpg(rdev);
7076                         cik_enable_cp_pg(rdev, true);
7077                         cik_enable_gds_pg(rdev, true);
7078                 }
7079                 cik_init_ao_cu_mask(rdev);
7080                 cik_update_gfx_pg(rdev, true);
7081         }
7082 }
7083
7084 static void cik_fini_pg(struct radeon_device *rdev)
7085 {
7086         if (rdev->pg_flags) {
7087                 cik_update_gfx_pg(rdev, false);
7088                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7089                         cik_enable_cp_pg(rdev, false);
7090                         cik_enable_gds_pg(rdev, false);
7091                 }
7092         }
7093 }
7094
7095 /*
7096  * Interrupts
7097  * Starting with r6xx, interrupts are handled via a ring buffer.
7098  * Ring buffers are areas of GPU accessible memory that the GPU
7099  * writes interrupt vectors into and the host reads vectors out of.
7100  * There is a rptr (read pointer) that determines where the
7101  * host is currently reading, and a wptr (write pointer)
7102  * which determines where the GPU has written.  When the
7103  * pointers are equal, the ring is idle.  When the GPU
7104  * writes vectors to the ring buffer, it increments the
7105  * wptr.  When there is an interrupt, the host then starts
7106  * fetching commands and processing them until the pointers are
7107  * equal again at which point it updates the rptr.
7108  */
7109
7110 /**
7111  * cik_enable_interrupts - Enable the interrupt ring buffer
7112  *
7113  * @rdev: radeon_device pointer
7114  *
7115  * Enable the interrupt ring buffer (CIK).
7116  */
7117 static void cik_enable_interrupts(struct radeon_device *rdev)
7118 {
7119         u32 ih_cntl = RREG32(IH_CNTL);
7120         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7121
7122         ih_cntl |= ENABLE_INTR;
7123         ih_rb_cntl |= IH_RB_ENABLE;
7124         WREG32(IH_CNTL, ih_cntl);
7125         WREG32(IH_RB_CNTL, ih_rb_cntl);
7126         rdev->ih.enabled = true;
7127 }
7128
7129 /**
7130  * cik_disable_interrupts - Disable the interrupt ring buffer
7131  *
7132  * @rdev: radeon_device pointer
7133  *
7134  * Disable the interrupt ring buffer (CIK).
7135  */
7136 static void cik_disable_interrupts(struct radeon_device *rdev)
7137 {
7138         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7139         u32 ih_cntl = RREG32(IH_CNTL);
7140
7141         ih_rb_cntl &= ~IH_RB_ENABLE;
7142         ih_cntl &= ~ENABLE_INTR;
7143         WREG32(IH_RB_CNTL, ih_rb_cntl);
7144         WREG32(IH_CNTL, ih_cntl);
7145         /* set rptr, wptr to 0 */
7146         WREG32(IH_RB_RPTR, 0);
7147         WREG32(IH_RB_WPTR, 0);
7148         rdev->ih.enabled = false;
7149         rdev->ih.rptr = 0;
7150 }
7151
7152 /**
7153  * cik_disable_interrupt_state - Disable all interrupt sources
7154  *
7155  * @rdev: radeon_device pointer
7156  *
7157  * Clear all interrupt enable bits used by the driver (CIK).
7158  */
7159 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7160 {
7161         u32 tmp;
7162
7163         /* gfx ring */
7164         tmp = RREG32(CP_INT_CNTL_RING0) &
7165                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7166         WREG32(CP_INT_CNTL_RING0, tmp);
7167         /* sdma */
7168         tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7169         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7170         tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7171         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7172         /* compute queues */
7173         WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7174         WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7175         WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7176         WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7177         WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7178         WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7179         WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7180         WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7181         /* grbm */
7182         WREG32(GRBM_INT_CNTL, 0);
7183         /* vline/vblank, etc. */
7184         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7185         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7186         if (rdev->num_crtc >= 4) {
7187                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7188                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7189         }
7190         if (rdev->num_crtc >= 6) {
7191                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7192                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7193         }
7194         /* pflip */
7195         if (rdev->num_crtc >= 2) {
7196                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7197                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7198         }
7199         if (rdev->num_crtc >= 4) {
7200                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7201                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7202         }
7203         if (rdev->num_crtc >= 6) {
7204                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7205                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7206         }
7207
7208         /* dac hotplug */
7209         WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7210
7211         /* digital hotplug */
7212         tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7213         WREG32(DC_HPD1_INT_CONTROL, tmp);
7214         tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7215         WREG32(DC_HPD2_INT_CONTROL, tmp);
7216         tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7217         WREG32(DC_HPD3_INT_CONTROL, tmp);
7218         tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7219         WREG32(DC_HPD4_INT_CONTROL, tmp);
7220         tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7221         WREG32(DC_HPD5_INT_CONTROL, tmp);
7222         tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7223         WREG32(DC_HPD6_INT_CONTROL, tmp);
7224
7225 }
7226
7227 /**
7228  * cik_irq_init - init and enable the interrupt ring
7229  *
7230  * @rdev: radeon_device pointer
7231  *
7232  * Allocate a ring buffer for the interrupt controller,
7233  * enable the RLC, disable interrupts, enable the IH
7234  * ring buffer and enable it (CIK).
7235  * Called at device load and reume.
7236  * Returns 0 for success, errors for failure.
7237  */
7238 static int cik_irq_init(struct radeon_device *rdev)
7239 {
7240         int ret = 0;
7241         int rb_bufsz;
7242         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7243
7244         /* allocate ring */
7245         ret = r600_ih_ring_alloc(rdev);
7246         if (ret)
7247                 return ret;
7248
7249         /* disable irqs */
7250         cik_disable_interrupts(rdev);
7251
7252         /* init rlc */
7253         ret = cik_rlc_resume(rdev);
7254         if (ret) {
7255                 r600_ih_ring_fini(rdev);
7256                 return ret;
7257         }
7258
7259         /* setup interrupt control */
7260         /* XXX this should actually be a bus address, not an MC address. same on older asics */
7261         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7262         interrupt_cntl = RREG32(INTERRUPT_CNTL);
7263         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7264          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7265          */
7266         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7267         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7268         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7269         WREG32(INTERRUPT_CNTL, interrupt_cntl);
7270
7271         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7272         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7273
7274         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7275                       IH_WPTR_OVERFLOW_CLEAR |
7276                       (rb_bufsz << 1));
7277
7278         if (rdev->wb.enabled)
7279                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7280
7281         /* set the writeback address whether it's enabled or not */
7282         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7283         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7284
7285         WREG32(IH_RB_CNTL, ih_rb_cntl);
7286
7287         /* set rptr, wptr to 0 */
7288         WREG32(IH_RB_RPTR, 0);
7289         WREG32(IH_RB_WPTR, 0);
7290
7291         /* Default settings for IH_CNTL (disabled at first) */
7292         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7293         /* RPTR_REARM only works if msi's are enabled */
7294         if (rdev->msi_enabled)
7295                 ih_cntl |= RPTR_REARM;
7296         WREG32(IH_CNTL, ih_cntl);
7297
7298         /* force the active interrupt state to all disabled */
7299         cik_disable_interrupt_state(rdev);
7300
7301         pci_set_master(rdev->pdev);
7302
7303         /* enable irqs */
7304         cik_enable_interrupts(rdev);
7305
7306         return ret;
7307 }
7308
7309 /**
7310  * cik_irq_set - enable/disable interrupt sources
7311  *
7312  * @rdev: radeon_device pointer
7313  *
7314  * Enable interrupt sources on the GPU (vblanks, hpd,
7315  * etc.) (CIK).
7316  * Returns 0 for success, errors for failure.
7317  */
7318 int cik_irq_set(struct radeon_device *rdev)
7319 {
7320         u32 cp_int_cntl;
7321         u32 cp_m1p0;
7322         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7323         u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7324         u32 grbm_int_cntl = 0;
7325         u32 dma_cntl, dma_cntl1;
7326         u32 thermal_int;
7327
7328         if (!rdev->irq.installed) {
7329                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7330                 return -EINVAL;
7331         }
7332         /* don't enable anything if the ih is disabled */
7333         if (!rdev->ih.enabled) {
7334                 cik_disable_interrupts(rdev);
7335                 /* force the active interrupt state to all disabled */
7336                 cik_disable_interrupt_state(rdev);
7337                 return 0;
7338         }
7339
7340         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7341                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7342         cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7343
7344         hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
7345         hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
7346         hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
7347         hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
7348         hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
7349         hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
7350
7351         dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7352         dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7353
7354         cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7355
7356         if (rdev->flags & RADEON_IS_IGP)
7357                 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
7358                         ~(THERM_INTH_MASK | THERM_INTL_MASK);
7359         else
7360                 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
7361                         ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
7362
7363         /* enable CP interrupts on all rings */
7364         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7365                 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7366                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7367         }
7368         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7369                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7370                 DRM_DEBUG("si_irq_set: sw int cp1\n");
7371                 if (ring->me == 1) {
7372                         switch (ring->pipe) {
7373                         case 0:
7374                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7375                                 break;
7376                         default:
7377                                 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7378                                 break;
7379                         }
7380                 } else {
7381                         DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7382                 }
7383         }
7384         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7385                 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7386                 DRM_DEBUG("si_irq_set: sw int cp2\n");
7387                 if (ring->me == 1) {
7388                         switch (ring->pipe) {
7389                         case 0:
7390                                 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7391                                 break;
7392                         default:
7393                                 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7394                                 break;
7395                         }
7396                 } else {
7397                         DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7398                 }
7399         }
7400
7401         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7402                 DRM_DEBUG("cik_irq_set: sw int dma\n");
7403                 dma_cntl |= TRAP_ENABLE;
7404         }
7405
7406         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7407                 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7408                 dma_cntl1 |= TRAP_ENABLE;
7409         }
7410
7411         if (rdev->irq.crtc_vblank_int[0] ||
7412             atomic_read(&rdev->irq.pflip[0])) {
7413                 DRM_DEBUG("cik_irq_set: vblank 0\n");
7414                 crtc1 |= VBLANK_INTERRUPT_MASK;
7415         }
7416         if (rdev->irq.crtc_vblank_int[1] ||
7417             atomic_read(&rdev->irq.pflip[1])) {
7418                 DRM_DEBUG("cik_irq_set: vblank 1\n");
7419                 crtc2 |= VBLANK_INTERRUPT_MASK;
7420         }
7421         if (rdev->irq.crtc_vblank_int[2] ||
7422             atomic_read(&rdev->irq.pflip[2])) {
7423                 DRM_DEBUG("cik_irq_set: vblank 2\n");
7424                 crtc3 |= VBLANK_INTERRUPT_MASK;
7425         }
7426         if (rdev->irq.crtc_vblank_int[3] ||
7427             atomic_read(&rdev->irq.pflip[3])) {
7428                 DRM_DEBUG("cik_irq_set: vblank 3\n");
7429                 crtc4 |= VBLANK_INTERRUPT_MASK;
7430         }
7431         if (rdev->irq.crtc_vblank_int[4] ||
7432             atomic_read(&rdev->irq.pflip[4])) {
7433                 DRM_DEBUG("cik_irq_set: vblank 4\n");
7434                 crtc5 |= VBLANK_INTERRUPT_MASK;
7435         }
7436         if (rdev->irq.crtc_vblank_int[5] ||
7437             atomic_read(&rdev->irq.pflip[5])) {
7438                 DRM_DEBUG("cik_irq_set: vblank 5\n");
7439                 crtc6 |= VBLANK_INTERRUPT_MASK;
7440         }
7441         if (rdev->irq.hpd[0]) {
7442                 DRM_DEBUG("cik_irq_set: hpd 1\n");
7443                 hpd1 |= DC_HPDx_INT_EN;
7444         }
7445         if (rdev->irq.hpd[1]) {
7446                 DRM_DEBUG("cik_irq_set: hpd 2\n");
7447                 hpd2 |= DC_HPDx_INT_EN;
7448         }
7449         if (rdev->irq.hpd[2]) {
7450                 DRM_DEBUG("cik_irq_set: hpd 3\n");
7451                 hpd3 |= DC_HPDx_INT_EN;
7452         }
7453         if (rdev->irq.hpd[3]) {
7454                 DRM_DEBUG("cik_irq_set: hpd 4\n");
7455                 hpd4 |= DC_HPDx_INT_EN;
7456         }
7457         if (rdev->irq.hpd[4]) {
7458                 DRM_DEBUG("cik_irq_set: hpd 5\n");
7459                 hpd5 |= DC_HPDx_INT_EN;
7460         }
7461         if (rdev->irq.hpd[5]) {
7462                 DRM_DEBUG("cik_irq_set: hpd 6\n");
7463                 hpd6 |= DC_HPDx_INT_EN;
7464         }
7465
7466         if (rdev->irq.dpm_thermal) {
7467                 DRM_DEBUG("dpm thermal\n");
7468                 if (rdev->flags & RADEON_IS_IGP)
7469                         thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7470                 else
7471                         thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7472         }
7473
7474         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7475
7476         WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7477         WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7478
7479         WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7480
7481         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7482
7483         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7484         WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7485         if (rdev->num_crtc >= 4) {
7486                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7487                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7488         }
7489         if (rdev->num_crtc >= 6) {
7490                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7491                 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7492         }
7493
7494         if (rdev->num_crtc >= 2) {
7495                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7496                        GRPH_PFLIP_INT_MASK);
7497                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7498                        GRPH_PFLIP_INT_MASK);
7499         }
7500         if (rdev->num_crtc >= 4) {
7501                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7502                        GRPH_PFLIP_INT_MASK);
7503                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7504                        GRPH_PFLIP_INT_MASK);
7505         }
7506         if (rdev->num_crtc >= 6) {
7507                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7508                        GRPH_PFLIP_INT_MASK);
7509                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7510                        GRPH_PFLIP_INT_MASK);
7511         }
7512
7513         WREG32(DC_HPD1_INT_CONTROL, hpd1);
7514         WREG32(DC_HPD2_INT_CONTROL, hpd2);
7515         WREG32(DC_HPD3_INT_CONTROL, hpd3);
7516         WREG32(DC_HPD4_INT_CONTROL, hpd4);
7517         WREG32(DC_HPD5_INT_CONTROL, hpd5);
7518         WREG32(DC_HPD6_INT_CONTROL, hpd6);
7519
7520         if (rdev->flags & RADEON_IS_IGP)
7521                 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7522         else
7523                 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7524
7525         return 0;
7526 }
7527
7528 /**
7529  * cik_irq_ack - ack interrupt sources
7530  *
7531  * @rdev: radeon_device pointer
7532  *
7533  * Ack interrupt sources on the GPU (vblanks, hpd,
7534  * etc.) (CIK).  Certain interrupts sources are sw
7535  * generated and do not require an explicit ack.
7536  */
7537 static inline void cik_irq_ack(struct radeon_device *rdev)
7538 {
7539         u32 tmp;
7540
7541         rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7542         rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7543         rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7544         rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7545         rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7546         rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7547         rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7548
7549         rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7550                 EVERGREEN_CRTC0_REGISTER_OFFSET);
7551         rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7552                 EVERGREEN_CRTC1_REGISTER_OFFSET);
7553         if (rdev->num_crtc >= 4) {
7554                 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7555                         EVERGREEN_CRTC2_REGISTER_OFFSET);
7556                 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7557                         EVERGREEN_CRTC3_REGISTER_OFFSET);
7558         }
7559         if (rdev->num_crtc >= 6) {
7560                 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7561                         EVERGREEN_CRTC4_REGISTER_OFFSET);
7562                 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7563                         EVERGREEN_CRTC5_REGISTER_OFFSET);
7564         }
7565
7566         if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7567                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7568                        GRPH_PFLIP_INT_CLEAR);
7569         if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7570                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7571                        GRPH_PFLIP_INT_CLEAR);
7572         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7573                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7574         if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7575                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7576         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7577                 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7578         if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7579                 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7580
7581         if (rdev->num_crtc >= 4) {
7582                 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7583                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7584                                GRPH_PFLIP_INT_CLEAR);
7585                 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7586                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7587                                GRPH_PFLIP_INT_CLEAR);
7588                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7589                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7590                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7591                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7592                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7593                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7594                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7595                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7596         }
7597
7598         if (rdev->num_crtc >= 6) {
7599                 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7600                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7601                                GRPH_PFLIP_INT_CLEAR);
7602                 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7603                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7604                                GRPH_PFLIP_INT_CLEAR);
7605                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7606                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7607                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7608                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7609                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7610                         WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7611                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7612                         WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7613         }
7614
7615         if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7616                 tmp = RREG32(DC_HPD1_INT_CONTROL);
7617                 tmp |= DC_HPDx_INT_ACK;
7618                 WREG32(DC_HPD1_INT_CONTROL, tmp);
7619         }
7620         if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7621                 tmp = RREG32(DC_HPD2_INT_CONTROL);
7622                 tmp |= DC_HPDx_INT_ACK;
7623                 WREG32(DC_HPD2_INT_CONTROL, tmp);
7624         }
7625         if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7626                 tmp = RREG32(DC_HPD3_INT_CONTROL);
7627                 tmp |= DC_HPDx_INT_ACK;
7628                 WREG32(DC_HPD3_INT_CONTROL, tmp);
7629         }
7630         if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7631                 tmp = RREG32(DC_HPD4_INT_CONTROL);
7632                 tmp |= DC_HPDx_INT_ACK;
7633                 WREG32(DC_HPD4_INT_CONTROL, tmp);
7634         }
7635         if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7636                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7637                 tmp |= DC_HPDx_INT_ACK;
7638                 WREG32(DC_HPD5_INT_CONTROL, tmp);
7639         }
7640         if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7641                 tmp = RREG32(DC_HPD5_INT_CONTROL);
7642                 tmp |= DC_HPDx_INT_ACK;
7643                 WREG32(DC_HPD6_INT_CONTROL, tmp);
7644         }
7645 }
7646
7647 /**
7648  * cik_irq_disable - disable interrupts
7649  *
7650  * @rdev: radeon_device pointer
7651  *
7652  * Disable interrupts on the hw (CIK).
7653  */
7654 static void cik_irq_disable(struct radeon_device *rdev)
7655 {
7656         cik_disable_interrupts(rdev);
7657         /* Wait and acknowledge irq */
7658         mdelay(1);
7659         cik_irq_ack(rdev);
7660         cik_disable_interrupt_state(rdev);
7661 }
7662
7663 /**
7664  * cik_irq_disable - disable interrupts for suspend
7665  *
7666  * @rdev: radeon_device pointer
7667  *
7668  * Disable interrupts and stop the RLC (CIK).
7669  * Used for suspend.
7670  */
7671 static void cik_irq_suspend(struct radeon_device *rdev)
7672 {
7673         cik_irq_disable(rdev);
7674         cik_rlc_stop(rdev);
7675 }
7676
7677 /**
7678  * cik_irq_fini - tear down interrupt support
7679  *
7680  * @rdev: radeon_device pointer
7681  *
7682  * Disable interrupts on the hw and free the IH ring
7683  * buffer (CIK).
7684  * Used for driver unload.
7685  */
7686 static void cik_irq_fini(struct radeon_device *rdev)
7687 {
7688         cik_irq_suspend(rdev);
7689         r600_ih_ring_fini(rdev);
7690 }
7691
7692 /**
7693  * cik_get_ih_wptr - get the IH ring buffer wptr
7694  *
7695  * @rdev: radeon_device pointer
7696  *
7697  * Get the IH ring buffer wptr from either the register
7698  * or the writeback memory buffer (CIK).  Also check for
7699  * ring buffer overflow and deal with it.
7700  * Used by cik_irq_process().
7701  * Returns the value of the wptr.
7702  */
7703 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7704 {
7705         u32 wptr, tmp;
7706
7707         if (rdev->wb.enabled)
7708                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7709         else
7710                 wptr = RREG32(IH_RB_WPTR);
7711
7712         if (wptr & RB_OVERFLOW) {
7713                 wptr &= ~RB_OVERFLOW;
7714                 /* When a ring buffer overflow happen start parsing interrupt
7715                  * from the last not overwritten vector (wptr + 16). Hopefully
7716                  * this should allow us to catchup.
7717                  */
7718                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7719                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7720                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7721                 tmp = RREG32(IH_RB_CNTL);
7722                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7723                 WREG32(IH_RB_CNTL, tmp);
7724         }
7725         return (wptr & rdev->ih.ptr_mask);
7726 }
7727
7728 /*        CIK IV Ring
7729  * Each IV ring entry is 128 bits:
7730  * [7:0]    - interrupt source id
7731  * [31:8]   - reserved
7732  * [59:32]  - interrupt source data
7733  * [63:60]  - reserved
7734  * [71:64]  - RINGID
7735  *            CP:
7736  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7737  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7738  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7739  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7740  *            PIPE_ID - ME0 0=3D
7741  *                    - ME1&2 compute dispatcher (4 pipes each)
7742  *            SDMA:
7743  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7744  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7745  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7746  * [79:72]  - VMID
7747  * [95:80]  - PASID
7748  * [127:96] - reserved
7749  */
7750 /**
7751  * cik_irq_process - interrupt handler
7752  *
7753  * @rdev: radeon_device pointer
7754  *
7755  * Interrupt hander (CIK).  Walk the IH ring,
7756  * ack interrupts and schedule work to handle
7757  * interrupt events.
7758  * Returns irq process return code.
7759  */
7760 int cik_irq_process(struct radeon_device *rdev)
7761 {
7762         struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7763         struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7764         u32 wptr;
7765         u32 rptr;
7766         u32 src_id, src_data, ring_id;
7767         u8 me_id, pipe_id, queue_id;
7768         u32 ring_index;
7769         bool queue_hotplug = false;
7770         bool queue_reset = false;
7771         u32 addr, status, mc_client;
7772         bool queue_thermal = false;
7773
7774         if (!rdev->ih.enabled || rdev->shutdown)
7775                 return IRQ_NONE;
7776
7777         wptr = cik_get_ih_wptr(rdev);
7778
7779 restart_ih:
7780         /* is somebody else already processing irqs? */
7781         if (atomic_xchg(&rdev->ih.lock, 1))
7782                 return IRQ_NONE;
7783
7784         rptr = rdev->ih.rptr;
7785         DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7786
7787         /* Order reading of wptr vs. reading of IH ring data */
7788         rmb();
7789
7790         /* display interrupts */
7791         cik_irq_ack(rdev);
7792
7793         while (rptr != wptr) {
7794                 /* wptr/rptr are in bytes! */
7795                 ring_index = rptr / 4;
7796
7797                 radeon_kfd_interrupt(rdev,
7798                                 (const void *) &rdev->ih.ring[ring_index]);
7799
7800                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7801                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7802                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7803
7804                 switch (src_id) {
7805                 case 1: /* D1 vblank/vline */
7806                         switch (src_data) {
7807                         case 0: /* D1 vblank */
7808                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7809                                         if (rdev->irq.crtc_vblank_int[0]) {
7810                                                 drm_handle_vblank(rdev->ddev, 0);
7811                                                 rdev->pm.vblank_sync = true;
7812                                                 wake_up(&rdev->irq.vblank_queue);
7813                                         }
7814                                         if (atomic_read(&rdev->irq.pflip[0]))
7815                                                 radeon_crtc_handle_vblank(rdev, 0);
7816                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7817                                         DRM_DEBUG("IH: D1 vblank\n");
7818                                 }
7819                                 break;
7820                         case 1: /* D1 vline */
7821                                 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7822                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7823                                         DRM_DEBUG("IH: D1 vline\n");
7824                                 }
7825                                 break;
7826                         default:
7827                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7828                                 break;
7829                         }
7830                         break;
7831                 case 2: /* D2 vblank/vline */
7832                         switch (src_data) {
7833                         case 0: /* D2 vblank */
7834                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7835                                         if (rdev->irq.crtc_vblank_int[1]) {
7836                                                 drm_handle_vblank(rdev->ddev, 1);
7837                                                 rdev->pm.vblank_sync = true;
7838                                                 wake_up(&rdev->irq.vblank_queue);
7839                                         }
7840                                         if (atomic_read(&rdev->irq.pflip[1]))
7841                                                 radeon_crtc_handle_vblank(rdev, 1);
7842                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7843                                         DRM_DEBUG("IH: D2 vblank\n");
7844                                 }
7845                                 break;
7846                         case 1: /* D2 vline */
7847                                 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7848                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7849                                         DRM_DEBUG("IH: D2 vline\n");
7850                                 }
7851                                 break;
7852                         default:
7853                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7854                                 break;
7855                         }
7856                         break;
7857                 case 3: /* D3 vblank/vline */
7858                         switch (src_data) {
7859                         case 0: /* D3 vblank */
7860                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7861                                         if (rdev->irq.crtc_vblank_int[2]) {
7862                                                 drm_handle_vblank(rdev->ddev, 2);
7863                                                 rdev->pm.vblank_sync = true;
7864                                                 wake_up(&rdev->irq.vblank_queue);
7865                                         }
7866                                         if (atomic_read(&rdev->irq.pflip[2]))
7867                                                 radeon_crtc_handle_vblank(rdev, 2);
7868                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7869                                         DRM_DEBUG("IH: D3 vblank\n");
7870                                 }
7871                                 break;
7872                         case 1: /* D3 vline */
7873                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7874                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7875                                         DRM_DEBUG("IH: D3 vline\n");
7876                                 }
7877                                 break;
7878                         default:
7879                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7880                                 break;
7881                         }
7882                         break;
7883                 case 4: /* D4 vblank/vline */
7884                         switch (src_data) {
7885                         case 0: /* D4 vblank */
7886                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7887                                         if (rdev->irq.crtc_vblank_int[3]) {
7888                                                 drm_handle_vblank(rdev->ddev, 3);
7889                                                 rdev->pm.vblank_sync = true;
7890                                                 wake_up(&rdev->irq.vblank_queue);
7891                                         }
7892                                         if (atomic_read(&rdev->irq.pflip[3]))
7893                                                 radeon_crtc_handle_vblank(rdev, 3);
7894                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7895                                         DRM_DEBUG("IH: D4 vblank\n");
7896                                 }
7897                                 break;
7898                         case 1: /* D4 vline */
7899                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7900                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7901                                         DRM_DEBUG("IH: D4 vline\n");
7902                                 }
7903                                 break;
7904                         default:
7905                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7906                                 break;
7907                         }
7908                         break;
7909                 case 5: /* D5 vblank/vline */
7910                         switch (src_data) {
7911                         case 0: /* D5 vblank */
7912                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7913                                         if (rdev->irq.crtc_vblank_int[4]) {
7914                                                 drm_handle_vblank(rdev->ddev, 4);
7915                                                 rdev->pm.vblank_sync = true;
7916                                                 wake_up(&rdev->irq.vblank_queue);
7917                                         }
7918                                         if (atomic_read(&rdev->irq.pflip[4]))
7919                                                 radeon_crtc_handle_vblank(rdev, 4);
7920                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7921                                         DRM_DEBUG("IH: D5 vblank\n");
7922                                 }
7923                                 break;
7924                         case 1: /* D5 vline */
7925                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7926                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7927                                         DRM_DEBUG("IH: D5 vline\n");
7928                                 }
7929                                 break;
7930                         default:
7931                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7932                                 break;
7933                         }
7934                         break;
7935                 case 6: /* D6 vblank/vline */
7936                         switch (src_data) {
7937                         case 0: /* D6 vblank */
7938                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7939                                         if (rdev->irq.crtc_vblank_int[5]) {
7940                                                 drm_handle_vblank(rdev->ddev, 5);
7941                                                 rdev->pm.vblank_sync = true;
7942                                                 wake_up(&rdev->irq.vblank_queue);
7943                                         }
7944                                         if (atomic_read(&rdev->irq.pflip[5]))
7945                                                 radeon_crtc_handle_vblank(rdev, 5);
7946                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7947                                         DRM_DEBUG("IH: D6 vblank\n");
7948                                 }
7949                                 break;
7950                         case 1: /* D6 vline */
7951                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7952                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7953                                         DRM_DEBUG("IH: D6 vline\n");
7954                                 }
7955                                 break;
7956                         default:
7957                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7958                                 break;
7959                         }
7960                         break;
7961                 case 8: /* D1 page flip */
7962                 case 10: /* D2 page flip */
7963                 case 12: /* D3 page flip */
7964                 case 14: /* D4 page flip */
7965                 case 16: /* D5 page flip */
7966                 case 18: /* D6 page flip */
7967                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7968                         if (radeon_use_pflipirq > 0)
7969                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7970                         break;
7971                 case 42: /* HPD hotplug */
7972                         switch (src_data) {
7973                         case 0:
7974                                 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7975                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7976                                         queue_hotplug = true;
7977                                         DRM_DEBUG("IH: HPD1\n");
7978                                 }
7979                                 break;
7980                         case 1:
7981                                 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7982                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7983                                         queue_hotplug = true;
7984                                         DRM_DEBUG("IH: HPD2\n");
7985                                 }
7986                                 break;
7987                         case 2:
7988                                 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7989                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7990                                         queue_hotplug = true;
7991                                         DRM_DEBUG("IH: HPD3\n");
7992                                 }
7993                                 break;
7994                         case 3:
7995                                 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7996                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7997                                         queue_hotplug = true;
7998                                         DRM_DEBUG("IH: HPD4\n");
7999                                 }
8000                                 break;
8001                         case 4:
8002                                 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8003                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8004                                         queue_hotplug = true;
8005                                         DRM_DEBUG("IH: HPD5\n");
8006                                 }
8007                                 break;
8008                         case 5:
8009                                 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8010                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8011                                         queue_hotplug = true;
8012                                         DRM_DEBUG("IH: HPD6\n");
8013                                 }
8014                                 break;
8015                         default:
8016                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8017                                 break;
8018                         }
8019                         break;
8020                 case 124: /* UVD */
8021                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8022                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8023                         break;
8024                 case 146:
8025                 case 147:
8026                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8027                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8028                         mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8029                         /* reset addr and status */
8030                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8031                         if (addr == 0x0 && status == 0x0)
8032                                 break;
8033                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8034                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8035                                 addr);
8036                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8037                                 status);
8038                         cik_vm_decode_fault(rdev, status, addr, mc_client);
8039                         break;
8040                 case 167: /* VCE */
8041                         DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8042                         switch (src_data) {
8043                         case 0:
8044                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8045                                 break;
8046                         case 1:
8047                                 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8048                                 break;
8049                         default:
8050                                 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8051                                 break;
8052                         }
8053                         break;
8054                 case 176: /* GFX RB CP_INT */
8055                 case 177: /* GFX IB CP_INT */
8056                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8057                         break;
8058                 case 181: /* CP EOP event */
8059                         DRM_DEBUG("IH: CP EOP\n");
8060                         /* XXX check the bitfield order! */
8061                         me_id = (ring_id & 0x60) >> 5;
8062                         pipe_id = (ring_id & 0x18) >> 3;
8063                         queue_id = (ring_id & 0x7) >> 0;
8064                         switch (me_id) {
8065                         case 0:
8066                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8067                                 break;
8068                         case 1:
8069                         case 2:
8070                                 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8071                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8072                                 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8073                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8074                                 break;
8075                         }
8076                         break;
8077                 case 184: /* CP Privileged reg access */
8078                         DRM_ERROR("Illegal register access in command stream\n");
8079                         /* XXX check the bitfield order! */
8080                         me_id = (ring_id & 0x60) >> 5;
8081                         pipe_id = (ring_id & 0x18) >> 3;
8082                         queue_id = (ring_id & 0x7) >> 0;
8083                         switch (me_id) {
8084                         case 0:
8085                                 /* This results in a full GPU reset, but all we need to do is soft
8086                                  * reset the CP for gfx
8087                                  */
8088                                 queue_reset = true;
8089                                 break;
8090                         case 1:
8091                                 /* XXX compute */
8092                                 queue_reset = true;
8093                                 break;
8094                         case 2:
8095                                 /* XXX compute */
8096                                 queue_reset = true;
8097                                 break;
8098                         }
8099                         break;
8100                 case 185: /* CP Privileged inst */
8101                         DRM_ERROR("Illegal instruction in command stream\n");
8102                         /* XXX check the bitfield order! */
8103                         me_id = (ring_id & 0x60) >> 5;
8104                         pipe_id = (ring_id & 0x18) >> 3;
8105                         queue_id = (ring_id & 0x7) >> 0;
8106                         switch (me_id) {
8107                         case 0:
8108                                 /* This results in a full GPU reset, but all we need to do is soft
8109                                  * reset the CP for gfx
8110                                  */
8111                                 queue_reset = true;
8112                                 break;
8113                         case 1:
8114                                 /* XXX compute */
8115                                 queue_reset = true;
8116                                 break;
8117                         case 2:
8118                                 /* XXX compute */
8119                                 queue_reset = true;
8120                                 break;
8121                         }
8122                         break;
8123                 case 224: /* SDMA trap event */
8124                         /* XXX check the bitfield order! */
8125                         me_id = (ring_id & 0x3) >> 0;
8126                         queue_id = (ring_id & 0xc) >> 2;
8127                         DRM_DEBUG("IH: SDMA trap\n");
8128                         switch (me_id) {
8129                         case 0:
8130                                 switch (queue_id) {
8131                                 case 0:
8132                                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8133                                         break;
8134                                 case 1:
8135                                         /* XXX compute */
8136                                         break;
8137                                 case 2:
8138                                         /* XXX compute */
8139                                         break;
8140                                 }
8141                                 break;
8142                         case 1:
8143                                 switch (queue_id) {
8144                                 case 0:
8145                                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8146                                         break;
8147                                 case 1:
8148                                         /* XXX compute */
8149                                         break;
8150                                 case 2:
8151                                         /* XXX compute */
8152                                         break;
8153                                 }
8154                                 break;
8155                         }
8156                         break;
8157                 case 230: /* thermal low to high */
8158                         DRM_DEBUG("IH: thermal low to high\n");
8159                         rdev->pm.dpm.thermal.high_to_low = false;
8160                         queue_thermal = true;
8161                         break;
8162                 case 231: /* thermal high to low */
8163                         DRM_DEBUG("IH: thermal high to low\n");
8164                         rdev->pm.dpm.thermal.high_to_low = true;
8165                         queue_thermal = true;
8166                         break;
8167                 case 233: /* GUI IDLE */
8168                         DRM_DEBUG("IH: GUI idle\n");
8169                         break;
8170                 case 241: /* SDMA Privileged inst */
8171                 case 247: /* SDMA Privileged inst */
8172                         DRM_ERROR("Illegal instruction in SDMA command stream\n");
8173                         /* XXX check the bitfield order! */
8174                         me_id = (ring_id & 0x3) >> 0;
8175                         queue_id = (ring_id & 0xc) >> 2;
8176                         switch (me_id) {
8177                         case 0:
8178                                 switch (queue_id) {
8179                                 case 0:
8180                                         queue_reset = true;
8181                                         break;
8182                                 case 1:
8183                                         /* XXX compute */
8184                                         queue_reset = true;
8185                                         break;
8186                                 case 2:
8187                                         /* XXX compute */
8188                                         queue_reset = true;
8189                                         break;
8190                                 }
8191                                 break;
8192                         case 1:
8193                                 switch (queue_id) {
8194                                 case 0:
8195                                         queue_reset = true;
8196                                         break;
8197                                 case 1:
8198                                         /* XXX compute */
8199                                         queue_reset = true;
8200                                         break;
8201                                 case 2:
8202                                         /* XXX compute */
8203                                         queue_reset = true;
8204                                         break;
8205                                 }
8206                                 break;
8207                         }
8208                         break;
8209                 default:
8210                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8211                         break;
8212                 }
8213
8214                 /* wptr/rptr are in bytes! */
8215                 rptr += 16;
8216                 rptr &= rdev->ih.ptr_mask;
8217                 WREG32(IH_RB_RPTR, rptr);
8218         }
8219         if (queue_hotplug)
8220                 schedule_work(&rdev->hotplug_work);
8221         if (queue_reset) {
8222                 rdev->needs_reset = true;
8223                 wake_up_all(&rdev->fence_queue);
8224         }
8225         if (queue_thermal)
8226                 schedule_work(&rdev->pm.dpm.thermal.work);
8227         rdev->ih.rptr = rptr;
8228         atomic_set(&rdev->ih.lock, 0);
8229
8230         /* make sure wptr hasn't changed while processing */
8231         wptr = cik_get_ih_wptr(rdev);
8232         if (wptr != rptr)
8233                 goto restart_ih;
8234
8235         return IRQ_HANDLED;
8236 }
8237
8238 /*
8239  * startup/shutdown callbacks
8240  */
8241 /**
8242  * cik_startup - program the asic to a functional state
8243  *
8244  * @rdev: radeon_device pointer
8245  *
8246  * Programs the asic to a functional state (CIK).
8247  * Called by cik_init() and cik_resume().
8248  * Returns 0 for success, error for failure.
8249  */
8250 static int cik_startup(struct radeon_device *rdev)
8251 {
8252         struct radeon_ring *ring;
8253         u32 nop;
8254         int r;
8255
8256         /* enable pcie gen2/3 link */
8257         cik_pcie_gen3_enable(rdev);
8258         /* enable aspm */
8259         cik_program_aspm(rdev);
8260
8261         /* scratch needs to be initialized before MC */
8262         r = r600_vram_scratch_init(rdev);
8263         if (r)
8264                 return r;
8265
8266         cik_mc_program(rdev);
8267
8268         if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8269                 r = ci_mc_load_microcode(rdev);
8270                 if (r) {
8271                         DRM_ERROR("Failed to load MC firmware!\n");
8272                         return r;
8273                 }
8274         }
8275
8276         r = cik_pcie_gart_enable(rdev);
8277         if (r)
8278                 return r;
8279         cik_gpu_init(rdev);
8280
8281         /* allocate rlc buffers */
8282         if (rdev->flags & RADEON_IS_IGP) {
8283                 if (rdev->family == CHIP_KAVERI) {
8284                         rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8285                         rdev->rlc.reg_list_size =
8286                                 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8287                 } else {
8288                         rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8289                         rdev->rlc.reg_list_size =
8290                                 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8291                 }
8292         }
8293         rdev->rlc.cs_data = ci_cs_data;
8294         rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8295         r = sumo_rlc_init(rdev);
8296         if (r) {
8297                 DRM_ERROR("Failed to init rlc BOs!\n");
8298                 return r;
8299         }
8300
8301         /* allocate wb buffer */
8302         r = radeon_wb_init(rdev);
8303         if (r)
8304                 return r;
8305
8306         /* allocate mec buffers */
8307         r = cik_mec_init(rdev);
8308         if (r) {
8309                 DRM_ERROR("Failed to init MEC BOs!\n");
8310                 return r;
8311         }
8312
8313         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8314         if (r) {
8315                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8316                 return r;
8317         }
8318
8319         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8320         if (r) {
8321                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8322                 return r;
8323         }
8324
8325         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8326         if (r) {
8327                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8328                 return r;
8329         }
8330
8331         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8332         if (r) {
8333                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8334                 return r;
8335         }
8336
8337         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8338         if (r) {
8339                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8340                 return r;
8341         }
8342
8343         r = radeon_uvd_resume(rdev);
8344         if (!r) {
8345                 r = uvd_v4_2_resume(rdev);
8346                 if (!r) {
8347                         r = radeon_fence_driver_start_ring(rdev,
8348                                                            R600_RING_TYPE_UVD_INDEX);
8349                         if (r)
8350                                 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8351                 }
8352         }
8353         if (r)
8354                 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8355
8356         r = radeon_vce_resume(rdev);
8357         if (!r) {
8358                 r = vce_v2_0_resume(rdev);
8359                 if (!r)
8360                         r = radeon_fence_driver_start_ring(rdev,
8361                                                            TN_RING_TYPE_VCE1_INDEX);
8362                 if (!r)
8363                         r = radeon_fence_driver_start_ring(rdev,
8364                                                            TN_RING_TYPE_VCE2_INDEX);
8365         }
8366         if (r) {
8367                 dev_err(rdev->dev, "VCE init error (%d).\n", r);
8368                 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8369                 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8370         }
8371
8372         /* Enable IRQ */
8373         if (!rdev->irq.installed) {
8374                 r = radeon_irq_kms_init(rdev);
8375                 if (r)
8376                         return r;
8377         }
8378
8379         r = cik_irq_init(rdev);
8380         if (r) {
8381                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8382                 radeon_irq_kms_fini(rdev);
8383                 return r;
8384         }
8385         cik_irq_set(rdev);
8386
8387         if (rdev->family == CHIP_HAWAII) {
8388                 if (rdev->new_fw)
8389                         nop = PACKET3(PACKET3_NOP, 0x3FFF);
8390                 else
8391                         nop = RADEON_CP_PACKET2;
8392         } else {
8393                 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8394         }
8395
8396         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8397         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8398                              nop);
8399         if (r)
8400                 return r;
8401
8402         /* set up the compute queues */
8403         /* type-2 packets are deprecated on MEC, use type-3 instead */
8404         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8405         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8406                              nop);
8407         if (r)
8408                 return r;
8409         ring->me = 1; /* first MEC */
8410         ring->pipe = 0; /* first pipe */
8411         ring->queue = 0; /* first queue */
8412         ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8413
8414         /* type-2 packets are deprecated on MEC, use type-3 instead */
8415         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8416         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8417                              nop);
8418         if (r)
8419                 return r;
8420         /* dGPU only have 1 MEC */
8421         ring->me = 1; /* first MEC */
8422         ring->pipe = 0; /* first pipe */
8423         ring->queue = 1; /* second queue */
8424         ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8425
8426         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8427         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8428                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8429         if (r)
8430                 return r;
8431
8432         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8433         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8434                              SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8435         if (r)
8436                 return r;
8437
8438         r = cik_cp_resume(rdev);
8439         if (r)
8440                 return r;
8441
8442         r = cik_sdma_resume(rdev);
8443         if (r)
8444                 return r;
8445
8446         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8447         if (ring->ring_size) {
8448                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8449                                      RADEON_CP_PACKET2);
8450                 if (!r)
8451                         r = uvd_v1_0_init(rdev);
8452                 if (r)
8453                         DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8454         }
8455
8456         r = -ENOENT;
8457
8458         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8459         if (ring->ring_size)
8460                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8461                                      VCE_CMD_NO_OP);
8462
8463         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8464         if (ring->ring_size)
8465                 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8466                                      VCE_CMD_NO_OP);
8467
8468         if (!r)
8469                 r = vce_v1_0_init(rdev);
8470         else if (r != -ENOENT)
8471                 DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8472
8473         r = radeon_ib_pool_init(rdev);
8474         if (r) {
8475                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8476                 return r;
8477         }
8478
8479         r = radeon_vm_manager_init(rdev);
8480         if (r) {
8481                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8482                 return r;
8483         }
8484
8485         r = dce6_audio_init(rdev);
8486         if (r)
8487                 return r;
8488
8489         r = radeon_kfd_resume(rdev);
8490         if (r)
8491                 return r;
8492
8493         return 0;
8494 }
8495
8496 /**
8497  * cik_resume - resume the asic to a functional state
8498  *
8499  * @rdev: radeon_device pointer
8500  *
8501  * Programs the asic to a functional state (CIK).
8502  * Called at resume.
8503  * Returns 0 for success, error for failure.
8504  */
8505 int cik_resume(struct radeon_device *rdev)
8506 {
8507         int r;
8508
8509         /* post card */
8510         atom_asic_init(rdev->mode_info.atom_context);
8511
8512         /* init golden registers */
8513         cik_init_golden_registers(rdev);
8514
8515         if (rdev->pm.pm_method == PM_METHOD_DPM)
8516                 radeon_pm_resume(rdev);
8517
8518         rdev->accel_working = true;
8519         r = cik_startup(rdev);
8520         if (r) {
8521                 DRM_ERROR("cik startup failed on resume\n");
8522                 rdev->accel_working = false;
8523                 return r;
8524         }
8525
8526         return r;
8527
8528 }
8529
8530 /**
8531  * cik_suspend - suspend the asic
8532  *
8533  * @rdev: radeon_device pointer
8534  *
8535  * Bring the chip into a state suitable for suspend (CIK).
8536  * Called at suspend.
8537  * Returns 0 for success.
8538  */
8539 int cik_suspend(struct radeon_device *rdev)
8540 {
8541         radeon_kfd_suspend(rdev);
8542         radeon_pm_suspend(rdev);
8543         dce6_audio_fini(rdev);
8544         radeon_vm_manager_fini(rdev);
8545         cik_cp_enable(rdev, false);
8546         cik_sdma_enable(rdev, false);
8547         uvd_v1_0_fini(rdev);
8548         radeon_uvd_suspend(rdev);
8549         radeon_vce_suspend(rdev);
8550         cik_fini_pg(rdev);
8551         cik_fini_cg(rdev);
8552         cik_irq_suspend(rdev);
8553         radeon_wb_disable(rdev);
8554         cik_pcie_gart_disable(rdev);
8555         return 0;
8556 }
8557
8558 /* Plan is to move initialization in that function and use
8559  * helper function so that radeon_device_init pretty much
8560  * do nothing more than calling asic specific function. This
8561  * should also allow to remove a bunch of callback function
8562  * like vram_info.
8563  */
8564 /**
8565  * cik_init - asic specific driver and hw init
8566  *
8567  * @rdev: radeon_device pointer
8568  *
8569  * Setup asic specific driver variables and program the hw
8570  * to a functional state (CIK).
8571  * Called at driver startup.
8572  * Returns 0 for success, errors for failure.
8573  */
8574 int cik_init(struct radeon_device *rdev)
8575 {
8576         struct radeon_ring *ring;
8577         int r;
8578
8579         /* Read BIOS */
8580         if (!radeon_get_bios(rdev)) {
8581                 if (ASIC_IS_AVIVO(rdev))
8582                         return -EINVAL;
8583         }
8584         /* Must be an ATOMBIOS */
8585         if (!rdev->is_atom_bios) {
8586                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8587                 return -EINVAL;
8588         }
8589         r = radeon_atombios_init(rdev);
8590         if (r)
8591                 return r;
8592
8593         /* Post card if necessary */
8594         if (!radeon_card_posted(rdev)) {
8595                 if (!rdev->bios) {
8596                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8597                         return -EINVAL;
8598                 }
8599                 DRM_INFO("GPU not posted. posting now...\n");
8600                 atom_asic_init(rdev->mode_info.atom_context);
8601         }
8602         /* init golden registers */
8603         cik_init_golden_registers(rdev);
8604         /* Initialize scratch registers */
8605         cik_scratch_init(rdev);
8606         /* Initialize surface registers */
8607         radeon_surface_init(rdev);
8608         /* Initialize clocks */
8609         radeon_get_clock_info(rdev->ddev);
8610
8611         /* Fence driver */
8612         r = radeon_fence_driver_init(rdev);
8613         if (r)
8614                 return r;
8615
8616         /* initialize memory controller */
8617         r = cik_mc_init(rdev);
8618         if (r)
8619                 return r;
8620         /* Memory manager */
8621         r = radeon_bo_init(rdev);
8622         if (r)
8623                 return r;
8624
8625         if (rdev->flags & RADEON_IS_IGP) {
8626                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8627                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8628                         r = cik_init_microcode(rdev);
8629                         if (r) {
8630                                 DRM_ERROR("Failed to load firmware!\n");
8631                                 return r;
8632                         }
8633                 }
8634         } else {
8635                 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8636                     !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8637                     !rdev->mc_fw) {
8638                         r = cik_init_microcode(rdev);
8639                         if (r) {
8640                                 DRM_ERROR("Failed to load firmware!\n");
8641                                 return r;
8642                         }
8643                 }
8644         }
8645
8646         /* Initialize power management */
8647         radeon_pm_init(rdev);
8648
8649         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8650         ring->ring_obj = NULL;
8651         r600_ring_init(rdev, ring, 1024 * 1024);
8652
8653         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8654         ring->ring_obj = NULL;
8655         r600_ring_init(rdev, ring, 1024 * 1024);
8656         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8657         if (r)
8658                 return r;
8659
8660         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8661         ring->ring_obj = NULL;
8662         r600_ring_init(rdev, ring, 1024 * 1024);
8663         r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8664         if (r)
8665                 return r;
8666
8667         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8668         ring->ring_obj = NULL;
8669         r600_ring_init(rdev, ring, 256 * 1024);
8670
8671         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8672         ring->ring_obj = NULL;
8673         r600_ring_init(rdev, ring, 256 * 1024);
8674
8675         r = radeon_uvd_init(rdev);
8676         if (!r) {
8677                 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8678                 ring->ring_obj = NULL;
8679                 r600_ring_init(rdev, ring, 4096);
8680         }
8681
8682         r = radeon_vce_init(rdev);
8683         if (!r) {
8684                 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8685                 ring->ring_obj = NULL;
8686                 r600_ring_init(rdev, ring, 4096);
8687
8688                 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8689                 ring->ring_obj = NULL;
8690                 r600_ring_init(rdev, ring, 4096);
8691         }
8692
8693         rdev->ih.ring_obj = NULL;
8694         r600_ih_ring_init(rdev, 64 * 1024);
8695
8696         r = r600_pcie_gart_init(rdev);
8697         if (r)
8698                 return r;
8699
8700         rdev->accel_working = true;
8701         r = cik_startup(rdev);
8702         if (r) {
8703                 dev_err(rdev->dev, "disabling GPU acceleration\n");
8704                 cik_cp_fini(rdev);
8705                 cik_sdma_fini(rdev);
8706                 cik_irq_fini(rdev);
8707                 sumo_rlc_fini(rdev);
8708                 cik_mec_fini(rdev);
8709                 radeon_wb_fini(rdev);
8710                 radeon_ib_pool_fini(rdev);
8711                 radeon_vm_manager_fini(rdev);
8712                 radeon_irq_kms_fini(rdev);
8713                 cik_pcie_gart_fini(rdev);
8714                 rdev->accel_working = false;
8715         }
8716
8717         /* Don't start up if the MC ucode is missing.
8718          * The default clocks and voltages before the MC ucode
8719          * is loaded are not suffient for advanced operations.
8720          */
8721         if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8722                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8723                 return -EINVAL;
8724         }
8725
8726         return 0;
8727 }
8728
8729 /**
8730  * cik_fini - asic specific driver and hw fini
8731  *
8732  * @rdev: radeon_device pointer
8733  *
8734  * Tear down the asic specific driver variables and program the hw
8735  * to an idle state (CIK).
8736  * Called at driver unload.
8737  */
8738 void cik_fini(struct radeon_device *rdev)
8739 {
8740         radeon_pm_fini(rdev);
8741         cik_cp_fini(rdev);
8742         cik_sdma_fini(rdev);
8743         cik_fini_pg(rdev);
8744         cik_fini_cg(rdev);
8745         cik_irq_fini(rdev);
8746         sumo_rlc_fini(rdev);
8747         cik_mec_fini(rdev);
8748         radeon_wb_fini(rdev);
8749         radeon_vm_manager_fini(rdev);
8750         radeon_ib_pool_fini(rdev);
8751         radeon_irq_kms_fini(rdev);
8752         uvd_v1_0_fini(rdev);
8753         radeon_uvd_fini(rdev);
8754         radeon_vce_fini(rdev);
8755         cik_pcie_gart_fini(rdev);
8756         r600_vram_scratch_fini(rdev);
8757         radeon_gem_fini(rdev);
8758         radeon_fence_driver_fini(rdev);
8759         radeon_bo_fini(rdev);
8760         radeon_atombios_fini(rdev);
8761         kfree(rdev->bios);
8762         rdev->bios = NULL;
8763 }
8764
8765 void dce8_program_fmt(struct drm_encoder *encoder)
8766 {
8767         struct drm_device *dev = encoder->dev;
8768         struct radeon_device *rdev = dev->dev_private;
8769         struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8770         struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8771         struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8772         int bpc = 0;
8773         u32 tmp = 0;
8774         enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8775
8776         if (connector) {
8777                 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8778                 bpc = radeon_get_monitor_bpc(connector);
8779                 dither = radeon_connector->dither;
8780         }
8781
8782         /* LVDS/eDP FMT is set up by atom */
8783         if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8784                 return;
8785
8786         /* not needed for analog */
8787         if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8788             (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8789                 return;
8790
8791         if (bpc == 0)
8792                 return;
8793
8794         switch (bpc) {
8795         case 6:
8796                 if (dither == RADEON_FMT_DITHER_ENABLE)
8797                         /* XXX sort out optimal dither settings */
8798                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8799                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8800                 else
8801                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8802                 break;
8803         case 8:
8804                 if (dither == RADEON_FMT_DITHER_ENABLE)
8805                         /* XXX sort out optimal dither settings */
8806                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8807                                 FMT_RGB_RANDOM_ENABLE |
8808                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8809                 else
8810                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8811                 break;
8812         case 10:
8813                 if (dither == RADEON_FMT_DITHER_ENABLE)
8814                         /* XXX sort out optimal dither settings */
8815                         tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8816                                 FMT_RGB_RANDOM_ENABLE |
8817                                 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8818                 else
8819                         tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8820                 break;
8821         default:
8822                 /* not needed */
8823                 break;
8824         }
8825
8826         WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8827 }
8828
8829 /* display watermark setup */
8830 /**
8831  * dce8_line_buffer_adjust - Set up the line buffer
8832  *
8833  * @rdev: radeon_device pointer
8834  * @radeon_crtc: the selected display controller
8835  * @mode: the current display mode on the selected display
8836  * controller
8837  *
8838  * Setup up the line buffer allocation for
8839  * the selected display controller (CIK).
8840  * Returns the line buffer size in pixels.
8841  */
8842 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8843                                    struct radeon_crtc *radeon_crtc,
8844                                    struct drm_display_mode *mode)
8845 {
8846         u32 tmp, buffer_alloc, i;
8847         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8848         /*
8849          * Line Buffer Setup
8850          * There are 6 line buffers, one for each display controllers.
8851          * There are 3 partitions per LB. Select the number of partitions
8852          * to enable based on the display width.  For display widths larger
8853          * than 4096, you need use to use 2 display controllers and combine
8854          * them using the stereo blender.
8855          */
8856         if (radeon_crtc->base.enabled && mode) {
8857                 if (mode->crtc_hdisplay < 1920) {
8858                         tmp = 1;
8859                         buffer_alloc = 2;
8860                 } else if (mode->crtc_hdisplay < 2560) {
8861                         tmp = 2;
8862                         buffer_alloc = 2;
8863                 } else if (mode->crtc_hdisplay < 4096) {
8864                         tmp = 0;
8865                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8866                 } else {
8867                         DRM_DEBUG_KMS("Mode too big for LB!\n");
8868                         tmp = 0;
8869                         buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8870                 }
8871         } else {
8872                 tmp = 1;
8873                 buffer_alloc = 0;
8874         }
8875
8876         WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8877                LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8878
8879         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8880                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8881         for (i = 0; i < rdev->usec_timeout; i++) {
8882                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8883                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
8884                         break;
8885                 udelay(1);
8886         }
8887
8888         if (radeon_crtc->base.enabled && mode) {
8889                 switch (tmp) {
8890                 case 0:
8891                 default:
8892                         return 4096 * 2;
8893                 case 1:
8894                         return 1920 * 2;
8895                 case 2:
8896                         return 2560 * 2;
8897                 }
8898         }
8899
8900         /* controller not enabled, so no lb used */
8901         return 0;
8902 }
8903
8904 /**
8905  * cik_get_number_of_dram_channels - get the number of dram channels
8906  *
8907  * @rdev: radeon_device pointer
8908  *
8909  * Look up the number of video ram channels (CIK).
8910  * Used for display watermark bandwidth calculations
8911  * Returns the number of dram channels
8912  */
8913 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8914 {
8915         u32 tmp = RREG32(MC_SHARED_CHMAP);
8916
8917         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8918         case 0:
8919         default:
8920                 return 1;
8921         case 1:
8922                 return 2;
8923         case 2:
8924                 return 4;
8925         case 3:
8926                 return 8;
8927         case 4:
8928                 return 3;
8929         case 5:
8930                 return 6;
8931         case 6:
8932                 return 10;
8933         case 7:
8934                 return 12;
8935         case 8:
8936                 return 16;
8937         }
8938 }
8939
8940 struct dce8_wm_params {
8941         u32 dram_channels; /* number of dram channels */
8942         u32 yclk;          /* bandwidth per dram data pin in kHz */
8943         u32 sclk;          /* engine clock in kHz */
8944         u32 disp_clk;      /* display clock in kHz */
8945         u32 src_width;     /* viewport width */
8946         u32 active_time;   /* active display time in ns */
8947         u32 blank_time;    /* blank time in ns */
8948         bool interlaced;    /* mode is interlaced */
8949         fixed20_12 vsc;    /* vertical scale ratio */
8950         u32 num_heads;     /* number of active crtcs */
8951         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8952         u32 lb_size;       /* line buffer allocated to pipe */
8953         u32 vtaps;         /* vertical scaler taps */
8954 };
8955
8956 /**
8957  * dce8_dram_bandwidth - get the dram bandwidth
8958  *
8959  * @wm: watermark calculation data
8960  *
8961  * Calculate the raw dram bandwidth (CIK).
8962  * Used for display watermark bandwidth calculations
8963  * Returns the dram bandwidth in MBytes/s
8964  */
8965 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8966 {
8967         /* Calculate raw DRAM Bandwidth */
8968         fixed20_12 dram_efficiency; /* 0.7 */
8969         fixed20_12 yclk, dram_channels, bandwidth;
8970         fixed20_12 a;
8971
8972         a.full = dfixed_const(1000);
8973         yclk.full = dfixed_const(wm->yclk);
8974         yclk.full = dfixed_div(yclk, a);
8975         dram_channels.full = dfixed_const(wm->dram_channels * 4);
8976         a.full = dfixed_const(10);
8977         dram_efficiency.full = dfixed_const(7);
8978         dram_efficiency.full = dfixed_div(dram_efficiency, a);
8979         bandwidth.full = dfixed_mul(dram_channels, yclk);
8980         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8981
8982         return dfixed_trunc(bandwidth);
8983 }
8984
8985 /**
8986  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8987  *
8988  * @wm: watermark calculation data
8989  *
8990  * Calculate the dram bandwidth used for display (CIK).
8991  * Used for display watermark bandwidth calculations
8992  * Returns the dram bandwidth for display in MBytes/s
8993  */
8994 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8995 {
8996         /* Calculate DRAM Bandwidth and the part allocated to display. */
8997         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8998         fixed20_12 yclk, dram_channels, bandwidth;
8999         fixed20_12 a;
9000
9001         a.full = dfixed_const(1000);
9002         yclk.full = dfixed_const(wm->yclk);
9003         yclk.full = dfixed_div(yclk, a);
9004         dram_channels.full = dfixed_const(wm->dram_channels * 4);
9005         a.full = dfixed_const(10);
9006         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9007         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9008         bandwidth.full = dfixed_mul(dram_channels, yclk);
9009         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9010
9011         return dfixed_trunc(bandwidth);
9012 }
9013
9014 /**
9015  * dce8_data_return_bandwidth - get the data return bandwidth
9016  *
9017  * @wm: watermark calculation data
9018  *
9019  * Calculate the data return bandwidth used for display (CIK).
9020  * Used for display watermark bandwidth calculations
9021  * Returns the data return bandwidth in MBytes/s
9022  */
9023 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9024 {
9025         /* Calculate the display Data return Bandwidth */
9026         fixed20_12 return_efficiency; /* 0.8 */
9027         fixed20_12 sclk, bandwidth;
9028         fixed20_12 a;
9029
9030         a.full = dfixed_const(1000);
9031         sclk.full = dfixed_const(wm->sclk);
9032         sclk.full = dfixed_div(sclk, a);
9033         a.full = dfixed_const(10);
9034         return_efficiency.full = dfixed_const(8);
9035         return_efficiency.full = dfixed_div(return_efficiency, a);
9036         a.full = dfixed_const(32);
9037         bandwidth.full = dfixed_mul(a, sclk);
9038         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9039
9040         return dfixed_trunc(bandwidth);
9041 }
9042
9043 /**
9044  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9045  *
9046  * @wm: watermark calculation data
9047  *
9048  * Calculate the dmif bandwidth used for display (CIK).
9049  * Used for display watermark bandwidth calculations
9050  * Returns the dmif bandwidth in MBytes/s
9051  */
9052 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9053 {
9054         /* Calculate the DMIF Request Bandwidth */
9055         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9056         fixed20_12 disp_clk, bandwidth;
9057         fixed20_12 a, b;
9058
9059         a.full = dfixed_const(1000);
9060         disp_clk.full = dfixed_const(wm->disp_clk);
9061         disp_clk.full = dfixed_div(disp_clk, a);
9062         a.full = dfixed_const(32);
9063         b.full = dfixed_mul(a, disp_clk);
9064
9065         a.full = dfixed_const(10);
9066         disp_clk_request_efficiency.full = dfixed_const(8);
9067         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9068
9069         bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9070
9071         return dfixed_trunc(bandwidth);
9072 }
9073
9074 /**
9075  * dce8_available_bandwidth - get the min available bandwidth
9076  *
9077  * @wm: watermark calculation data
9078  *
9079  * Calculate the min available bandwidth used for display (CIK).
9080  * Used for display watermark bandwidth calculations
9081  * Returns the min available bandwidth in MBytes/s
9082  */
9083 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9084 {
9085         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9086         u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9087         u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9088         u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9089
9090         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9091 }
9092
9093 /**
9094  * dce8_average_bandwidth - get the average available bandwidth
9095  *
9096  * @wm: watermark calculation data
9097  *
9098  * Calculate the average available bandwidth used for display (CIK).
9099  * Used for display watermark bandwidth calculations
9100  * Returns the average available bandwidth in MBytes/s
9101  */
9102 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9103 {
9104         /* Calculate the display mode Average Bandwidth
9105          * DisplayMode should contain the source and destination dimensions,
9106          * timing, etc.
9107          */
9108         fixed20_12 bpp;
9109         fixed20_12 line_time;
9110         fixed20_12 src_width;
9111         fixed20_12 bandwidth;
9112         fixed20_12 a;
9113
9114         a.full = dfixed_const(1000);
9115         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9116         line_time.full = dfixed_div(line_time, a);
9117         bpp.full = dfixed_const(wm->bytes_per_pixel);
9118         src_width.full = dfixed_const(wm->src_width);
9119         bandwidth.full = dfixed_mul(src_width, bpp);
9120         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9121         bandwidth.full = dfixed_div(bandwidth, line_time);
9122
9123         return dfixed_trunc(bandwidth);
9124 }
9125
9126 /**
9127  * dce8_latency_watermark - get the latency watermark
9128  *
9129  * @wm: watermark calculation data
9130  *
9131  * Calculate the latency watermark (CIK).
9132  * Used for display watermark bandwidth calculations
9133  * Returns the latency watermark in ns
9134  */
9135 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9136 {
9137         /* First calculate the latency in ns */
9138         u32 mc_latency = 2000; /* 2000 ns. */
9139         u32 available_bandwidth = dce8_available_bandwidth(wm);
9140         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9141         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9142         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9143         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9144                 (wm->num_heads * cursor_line_pair_return_time);
9145         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9146         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9147         u32 tmp, dmif_size = 12288;
9148         fixed20_12 a, b, c;
9149
9150         if (wm->num_heads == 0)
9151                 return 0;
9152
9153         a.full = dfixed_const(2);
9154         b.full = dfixed_const(1);
9155         if ((wm->vsc.full > a.full) ||
9156             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9157             (wm->vtaps >= 5) ||
9158             ((wm->vsc.full >= a.full) && wm->interlaced))
9159                 max_src_lines_per_dst_line = 4;
9160         else
9161                 max_src_lines_per_dst_line = 2;
9162
9163         a.full = dfixed_const(available_bandwidth);
9164         b.full = dfixed_const(wm->num_heads);
9165         a.full = dfixed_div(a, b);
9166
9167         b.full = dfixed_const(mc_latency + 512);
9168         c.full = dfixed_const(wm->disp_clk);
9169         b.full = dfixed_div(b, c);
9170
9171         c.full = dfixed_const(dmif_size);
9172         b.full = dfixed_div(c, b);
9173
9174         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9175
9176         b.full = dfixed_const(1000);
9177         c.full = dfixed_const(wm->disp_clk);
9178         b.full = dfixed_div(c, b);
9179         c.full = dfixed_const(wm->bytes_per_pixel);
9180         b.full = dfixed_mul(b, c);
9181
9182         lb_fill_bw = min(tmp, dfixed_trunc(b));
9183
9184         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9185         b.full = dfixed_const(1000);
9186         c.full = dfixed_const(lb_fill_bw);
9187         b.full = dfixed_div(c, b);
9188         a.full = dfixed_div(a, b);
9189         line_fill_time = dfixed_trunc(a);
9190
9191         if (line_fill_time < wm->active_time)
9192                 return latency;
9193         else
9194                 return latency + (line_fill_time - wm->active_time);
9195
9196 }
9197
9198 /**
9199  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9200  * average and available dram bandwidth
9201  *
9202  * @wm: watermark calculation data
9203  *
9204  * Check if the display average bandwidth fits in the display
9205  * dram bandwidth (CIK).
9206  * Used for display watermark bandwidth calculations
9207  * Returns true if the display fits, false if not.
9208  */
9209 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9210 {
9211         if (dce8_average_bandwidth(wm) <=
9212             (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9213                 return true;
9214         else
9215                 return false;
9216 }
9217
9218 /**
9219  * dce8_average_bandwidth_vs_available_bandwidth - check
9220  * average and available bandwidth
9221  *
9222  * @wm: watermark calculation data
9223  *
9224  * Check if the display average bandwidth fits in the display
9225  * available bandwidth (CIK).
9226  * Used for display watermark bandwidth calculations
9227  * Returns true if the display fits, false if not.
9228  */
9229 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9230 {
9231         if (dce8_average_bandwidth(wm) <=
9232             (dce8_available_bandwidth(wm) / wm->num_heads))
9233                 return true;
9234         else
9235                 return false;
9236 }
9237
9238 /**
9239  * dce8_check_latency_hiding - check latency hiding
9240  *
9241  * @wm: watermark calculation data
9242  *
9243  * Check latency hiding (CIK).
9244  * Used for display watermark bandwidth calculations
9245  * Returns true if the display fits, false if not.
9246  */
9247 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9248 {
9249         u32 lb_partitions = wm->lb_size / wm->src_width;
9250         u32 line_time = wm->active_time + wm->blank_time;
9251         u32 latency_tolerant_lines;
9252         u32 latency_hiding;
9253         fixed20_12 a;
9254
9255         a.full = dfixed_const(1);
9256         if (wm->vsc.full > a.full)
9257                 latency_tolerant_lines = 1;
9258         else {
9259                 if (lb_partitions <= (wm->vtaps + 1))
9260                         latency_tolerant_lines = 1;
9261                 else
9262                         latency_tolerant_lines = 2;
9263         }
9264
9265         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9266
9267         if (dce8_latency_watermark(wm) <= latency_hiding)
9268                 return true;
9269         else
9270                 return false;
9271 }
9272
9273 /**
9274  * dce8_program_watermarks - program display watermarks
9275  *
9276  * @rdev: radeon_device pointer
9277  * @radeon_crtc: the selected display controller
9278  * @lb_size: line buffer size
9279  * @num_heads: number of display controllers in use
9280  *
9281  * Calculate and program the display watermarks for the
9282  * selected display controller (CIK).
9283  */
9284 static void dce8_program_watermarks(struct radeon_device *rdev,
9285                                     struct radeon_crtc *radeon_crtc,
9286                                     u32 lb_size, u32 num_heads)
9287 {
9288         struct drm_display_mode *mode = &radeon_crtc->base.mode;
9289         struct dce8_wm_params wm_low, wm_high;
9290         u32 pixel_period;
9291         u32 line_time = 0;
9292         u32 latency_watermark_a = 0, latency_watermark_b = 0;
9293         u32 tmp, wm_mask;
9294
9295         if (radeon_crtc->base.enabled && num_heads && mode) {
9296                 pixel_period = 1000000 / (u32)mode->clock;
9297                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9298
9299                 /* watermark for high clocks */
9300                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9301                     rdev->pm.dpm_enabled) {
9302                         wm_high.yclk =
9303                                 radeon_dpm_get_mclk(rdev, false) * 10;
9304                         wm_high.sclk =
9305                                 radeon_dpm_get_sclk(rdev, false) * 10;
9306                 } else {
9307                         wm_high.yclk = rdev->pm.current_mclk * 10;
9308                         wm_high.sclk = rdev->pm.current_sclk * 10;
9309                 }
9310
9311                 wm_high.disp_clk = mode->clock;
9312                 wm_high.src_width = mode->crtc_hdisplay;
9313                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9314                 wm_high.blank_time = line_time - wm_high.active_time;
9315                 wm_high.interlaced = false;
9316                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9317                         wm_high.interlaced = true;
9318                 wm_high.vsc = radeon_crtc->vsc;
9319                 wm_high.vtaps = 1;
9320                 if (radeon_crtc->rmx_type != RMX_OFF)
9321                         wm_high.vtaps = 2;
9322                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9323                 wm_high.lb_size = lb_size;
9324                 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9325                 wm_high.num_heads = num_heads;
9326
9327                 /* set for high clocks */
9328                 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9329
9330                 /* possibly force display priority to high */
9331                 /* should really do this at mode validation time... */
9332                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9333                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9334                     !dce8_check_latency_hiding(&wm_high) ||
9335                     (rdev->disp_priority == 2)) {
9336                         DRM_DEBUG_KMS("force priority to high\n");
9337                 }
9338
9339                 /* watermark for low clocks */
9340                 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9341                     rdev->pm.dpm_enabled) {
9342                         wm_low.yclk =
9343                                 radeon_dpm_get_mclk(rdev, true) * 10;
9344                         wm_low.sclk =
9345                                 radeon_dpm_get_sclk(rdev, true) * 10;
9346                 } else {
9347                         wm_low.yclk = rdev->pm.current_mclk * 10;
9348                         wm_low.sclk = rdev->pm.current_sclk * 10;
9349                 }
9350
9351                 wm_low.disp_clk = mode->clock;
9352                 wm_low.src_width = mode->crtc_hdisplay;
9353                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9354                 wm_low.blank_time = line_time - wm_low.active_time;
9355                 wm_low.interlaced = false;
9356                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9357                         wm_low.interlaced = true;
9358                 wm_low.vsc = radeon_crtc->vsc;
9359                 wm_low.vtaps = 1;
9360                 if (radeon_crtc->rmx_type != RMX_OFF)
9361                         wm_low.vtaps = 2;
9362                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9363                 wm_low.lb_size = lb_size;
9364                 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9365                 wm_low.num_heads = num_heads;
9366
9367                 /* set for low clocks */
9368                 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9369
9370                 /* possibly force display priority to high */
9371                 /* should really do this at mode validation time... */
9372                 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9373                     !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9374                     !dce8_check_latency_hiding(&wm_low) ||
9375                     (rdev->disp_priority == 2)) {
9376                         DRM_DEBUG_KMS("force priority to high\n");
9377                 }
9378         }
9379
9380         /* select wm A */
9381         wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9382         tmp = wm_mask;
9383         tmp &= ~LATENCY_WATERMARK_MASK(3);
9384         tmp |= LATENCY_WATERMARK_MASK(1);
9385         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9386         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9387                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9388                 LATENCY_HIGH_WATERMARK(line_time)));
9389         /* select wm B */
9390         tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9391         tmp &= ~LATENCY_WATERMARK_MASK(3);
9392         tmp |= LATENCY_WATERMARK_MASK(2);
9393         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9394         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9395                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9396                 LATENCY_HIGH_WATERMARK(line_time)));
9397         /* restore original selection */
9398         WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9399
9400         /* save values for DPM */
9401         radeon_crtc->line_time = line_time;
9402         radeon_crtc->wm_high = latency_watermark_a;
9403         radeon_crtc->wm_low = latency_watermark_b;
9404 }
9405
9406 /**
9407  * dce8_bandwidth_update - program display watermarks
9408  *
9409  * @rdev: radeon_device pointer
9410  *
9411  * Calculate and program the display watermarks and line
9412  * buffer allocation (CIK).
9413  */
9414 void dce8_bandwidth_update(struct radeon_device *rdev)
9415 {
9416         struct drm_display_mode *mode = NULL;
9417         u32 num_heads = 0, lb_size;
9418         int i;
9419
9420         if (!rdev->mode_info.mode_config_initialized)
9421                 return;
9422
9423         radeon_update_display_priority(rdev);
9424
9425         for (i = 0; i < rdev->num_crtc; i++) {
9426                 if (rdev->mode_info.crtcs[i]->base.enabled)
9427                         num_heads++;
9428         }
9429         for (i = 0; i < rdev->num_crtc; i++) {
9430                 mode = &rdev->mode_info.crtcs[i]->base.mode;
9431                 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9432                 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9433         }
9434 }
9435
9436 /**
9437  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9438  *
9439  * @rdev: radeon_device pointer
9440  *
9441  * Fetches a GPU clock counter snapshot (SI).
9442  * Returns the 64 bit clock counter snapshot.
9443  */
9444 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9445 {
9446         uint64_t clock;
9447
9448         mutex_lock(&rdev->gpu_clock_mutex);
9449         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9450         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9451                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9452         mutex_unlock(&rdev->gpu_clock_mutex);
9453         return clock;
9454 }
9455
9456 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9457                               u32 cntl_reg, u32 status_reg)
9458 {
9459         int r, i;
9460         struct atom_clock_dividers dividers;
9461         uint32_t tmp;
9462
9463         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9464                                            clock, false, &dividers);
9465         if (r)
9466                 return r;
9467
9468         tmp = RREG32_SMC(cntl_reg);
9469         tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9470         tmp |= dividers.post_divider;
9471         WREG32_SMC(cntl_reg, tmp);
9472
9473         for (i = 0; i < 100; i++) {
9474                 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9475                         break;
9476                 mdelay(10);
9477         }
9478         if (i == 100)
9479                 return -ETIMEDOUT;
9480
9481         return 0;
9482 }
9483
9484 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9485 {
9486         int r = 0;
9487
9488         r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9489         if (r)
9490                 return r;
9491
9492         r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9493         return r;
9494 }
9495
9496 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9497 {
9498         int r, i;
9499         struct atom_clock_dividers dividers;
9500         u32 tmp;
9501
9502         r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9503                                            ecclk, false, &dividers);
9504         if (r)
9505                 return r;
9506
9507         for (i = 0; i < 100; i++) {
9508                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9509                         break;
9510                 mdelay(10);
9511         }
9512         if (i == 100)
9513                 return -ETIMEDOUT;
9514
9515         tmp = RREG32_SMC(CG_ECLK_CNTL);
9516         tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9517         tmp |= dividers.post_divider;
9518         WREG32_SMC(CG_ECLK_CNTL, tmp);
9519
9520         for (i = 0; i < 100; i++) {
9521                 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9522                         break;
9523                 mdelay(10);
9524         }
9525         if (i == 100)
9526                 return -ETIMEDOUT;
9527
9528         return 0;
9529 }
9530
9531 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9532 {
9533         struct pci_dev *root = rdev->pdev->bus->self;
9534         int bridge_pos, gpu_pos;
9535         u32 speed_cntl, mask, current_data_rate;
9536         int ret, i;
9537         u16 tmp16;
9538
9539         if (pci_is_root_bus(rdev->pdev->bus))
9540                 return;
9541
9542         if (radeon_pcie_gen2 == 0)
9543                 return;
9544
9545         if (rdev->flags & RADEON_IS_IGP)
9546                 return;
9547
9548         if (!(rdev->flags & RADEON_IS_PCIE))
9549                 return;
9550
9551         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9552         if (ret != 0)
9553                 return;
9554
9555         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9556                 return;
9557
9558         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9559         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9560                 LC_CURRENT_DATA_RATE_SHIFT;
9561         if (mask & DRM_PCIE_SPEED_80) {
9562                 if (current_data_rate == 2) {
9563                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9564                         return;
9565                 }
9566                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9567         } else if (mask & DRM_PCIE_SPEED_50) {
9568                 if (current_data_rate == 1) {
9569                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9570                         return;
9571                 }
9572                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9573         }
9574
9575         bridge_pos = pci_pcie_cap(root);
9576         if (!bridge_pos)
9577                 return;
9578
9579         gpu_pos = pci_pcie_cap(rdev->pdev);
9580         if (!gpu_pos)
9581                 return;
9582
9583         if (mask & DRM_PCIE_SPEED_80) {
9584                 /* re-try equalization if gen3 is not already enabled */
9585                 if (current_data_rate != 2) {
9586                         u16 bridge_cfg, gpu_cfg;
9587                         u16 bridge_cfg2, gpu_cfg2;
9588                         u32 max_lw, current_lw, tmp;
9589
9590                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9591                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9592
9593                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9594                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9595
9596                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9597                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9598
9599                         tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9600                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9601                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9602
9603                         if (current_lw < max_lw) {
9604                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9605                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9606                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9607                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9608                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9609                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9610                                 }
9611                         }
9612
9613                         for (i = 0; i < 10; i++) {
9614                                 /* check status */
9615                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9616                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9617                                         break;
9618
9619                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9620                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9621
9622                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9623                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9624
9625                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9626                                 tmp |= LC_SET_QUIESCE;
9627                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9628
9629                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9630                                 tmp |= LC_REDO_EQ;
9631                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9632
9633                                 mdelay(100);
9634
9635                                 /* linkctl */
9636                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9637                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9638                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9639                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9640
9641                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9642                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9643                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9644                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9645
9646                                 /* linkctl2 */
9647                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9648                                 tmp16 &= ~((1 << 4) | (7 << 9));
9649                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9650                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9651
9652                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9653                                 tmp16 &= ~((1 << 4) | (7 << 9));
9654                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9655                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9656
9657                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9658                                 tmp &= ~LC_SET_QUIESCE;
9659                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9660                         }
9661                 }
9662         }
9663
9664         /* set the link speed */
9665         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9666         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9667         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9668
9669         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9670         tmp16 &= ~0xf;
9671         if (mask & DRM_PCIE_SPEED_80)
9672                 tmp16 |= 3; /* gen3 */
9673         else if (mask & DRM_PCIE_SPEED_50)
9674                 tmp16 |= 2; /* gen2 */
9675         else
9676                 tmp16 |= 1; /* gen1 */
9677         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9678
9679         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9680         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9681         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9682
9683         for (i = 0; i < rdev->usec_timeout; i++) {
9684                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9685                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9686                         break;
9687                 udelay(1);
9688         }
9689 }
9690
9691 static void cik_program_aspm(struct radeon_device *rdev)
9692 {
9693         u32 data, orig;
9694         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9695         bool disable_clkreq = false;
9696
9697         if (radeon_aspm == 0)
9698                 return;
9699
9700         /* XXX double check IGPs */
9701         if (rdev->flags & RADEON_IS_IGP)
9702                 return;
9703
9704         if (!(rdev->flags & RADEON_IS_PCIE))
9705                 return;
9706
9707         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9708         data &= ~LC_XMIT_N_FTS_MASK;
9709         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9710         if (orig != data)
9711                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9712
9713         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9714         data |= LC_GO_TO_RECOVERY;
9715         if (orig != data)
9716                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9717
9718         orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9719         data |= P_IGNORE_EDB_ERR;
9720         if (orig != data)
9721                 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9722
9723         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9724         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9725         data |= LC_PMI_TO_L1_DIS;
9726         if (!disable_l0s)
9727                 data |= LC_L0S_INACTIVITY(7);
9728
9729         if (!disable_l1) {
9730                 data |= LC_L1_INACTIVITY(7);
9731                 data &= ~LC_PMI_TO_L1_DIS;
9732                 if (orig != data)
9733                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9734
9735                 if (!disable_plloff_in_l1) {
9736                         bool clk_req_support;
9737
9738                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9739                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9740                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9741                         if (orig != data)
9742                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9743
9744                         orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9745                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9746                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9747                         if (orig != data)
9748                                 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9749
9750                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9751                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9752                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9753                         if (orig != data)
9754                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9755
9756                         orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9757                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9758                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9759                         if (orig != data)
9760                                 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9761
9762                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9763                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9764                         data |= LC_DYN_LANES_PWR_STATE(3);
9765                         if (orig != data)
9766                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9767
9768                         if (!disable_clkreq &&
9769                             !pci_is_root_bus(rdev->pdev->bus)) {
9770                                 struct pci_dev *root = rdev->pdev->bus->self;
9771                                 u32 lnkcap;
9772
9773                                 clk_req_support = false;
9774                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9775                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9776                                         clk_req_support = true;
9777                         } else {
9778                                 clk_req_support = false;
9779                         }
9780
9781                         if (clk_req_support) {
9782                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9783                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9784                                 if (orig != data)
9785                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9786
9787                                 orig = data = RREG32_SMC(THM_CLK_CNTL);
9788                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9789                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9790                                 if (orig != data)
9791                                         WREG32_SMC(THM_CLK_CNTL, data);
9792
9793                                 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9794                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9795                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9796                                 if (orig != data)
9797                                         WREG32_SMC(MISC_CLK_CTRL, data);
9798
9799                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9800                                 data &= ~BCLK_AS_XCLK;
9801                                 if (orig != data)
9802                                         WREG32_SMC(CG_CLKPIN_CNTL, data);
9803
9804                                 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9805                                 data &= ~FORCE_BIF_REFCLK_EN;
9806                                 if (orig != data)
9807                                         WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9808
9809                                 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9810                                 data &= ~MPLL_CLKOUT_SEL_MASK;
9811                                 data |= MPLL_CLKOUT_SEL(4);
9812                                 if (orig != data)
9813                                         WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9814                         }
9815                 }
9816         } else {
9817                 if (orig != data)
9818                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9819         }
9820
9821         orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9822         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9823         if (orig != data)
9824                 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9825
9826         if (!disable_l0s) {
9827                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9828                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9829                         data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9830                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9831                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9832                                 data &= ~LC_L0S_INACTIVITY_MASK;
9833                                 if (orig != data)
9834                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9835                         }
9836                 }
9837         }
9838 }