Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / drivers / gpu / drm / radeon / si.c
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37
38
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53 MODULE_FIRMWARE("radeon/tahiti_k_smc.bin");
54
55 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
61 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
62
63 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
69 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
70
71 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
72 MODULE_FIRMWARE("radeon/VERDE_me.bin");
73 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
75 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
76 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
77 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
78
79 MODULE_FIRMWARE("radeon/verde_pfp.bin");
80 MODULE_FIRMWARE("radeon/verde_me.bin");
81 MODULE_FIRMWARE("radeon/verde_ce.bin");
82 MODULE_FIRMWARE("radeon/verde_mc.bin");
83 MODULE_FIRMWARE("radeon/verde_rlc.bin");
84 MODULE_FIRMWARE("radeon/verde_smc.bin");
85 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
86
87 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
88 MODULE_FIRMWARE("radeon/OLAND_me.bin");
89 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
91 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
92 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
93 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
94
95 MODULE_FIRMWARE("radeon/oland_pfp.bin");
96 MODULE_FIRMWARE("radeon/oland_me.bin");
97 MODULE_FIRMWARE("radeon/oland_ce.bin");
98 MODULE_FIRMWARE("radeon/oland_mc.bin");
99 MODULE_FIRMWARE("radeon/oland_rlc.bin");
100 MODULE_FIRMWARE("radeon/oland_smc.bin");
101 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
102
103 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
109 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
110
111 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
112 MODULE_FIRMWARE("radeon/hainan_me.bin");
113 MODULE_FIRMWARE("radeon/hainan_ce.bin");
114 MODULE_FIRMWARE("radeon/hainan_mc.bin");
115 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
116 MODULE_FIRMWARE("radeon/hainan_smc.bin");
117 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
118
119 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
120 static void si_pcie_gen3_enable(struct radeon_device *rdev);
121 static void si_program_aspm(struct radeon_device *rdev);
122 extern void sumo_rlc_fini(struct radeon_device *rdev);
123 extern int sumo_rlc_init(struct radeon_device *rdev);
124 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
125 extern void r600_ih_ring_fini(struct radeon_device *rdev);
126 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
127 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
128 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
129 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
130 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
131 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
132 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
133                                          bool enable);
134 static void si_init_pg(struct radeon_device *rdev);
135 static void si_init_cg(struct radeon_device *rdev);
136 static void si_fini_pg(struct radeon_device *rdev);
137 static void si_fini_cg(struct radeon_device *rdev);
138 static void si_rlc_stop(struct radeon_device *rdev);
139
140 static const u32 verde_rlc_save_restore_register_list[] =
141 {
142         (0x8000 << 16) | (0x98f4 >> 2),
143         0x00000000,
144         (0x8040 << 16) | (0x98f4 >> 2),
145         0x00000000,
146         (0x8000 << 16) | (0xe80 >> 2),
147         0x00000000,
148         (0x8040 << 16) | (0xe80 >> 2),
149         0x00000000,
150         (0x8000 << 16) | (0x89bc >> 2),
151         0x00000000,
152         (0x8040 << 16) | (0x89bc >> 2),
153         0x00000000,
154         (0x8000 << 16) | (0x8c1c >> 2),
155         0x00000000,
156         (0x8040 << 16) | (0x8c1c >> 2),
157         0x00000000,
158         (0x9c00 << 16) | (0x98f0 >> 2),
159         0x00000000,
160         (0x9c00 << 16) | (0xe7c >> 2),
161         0x00000000,
162         (0x8000 << 16) | (0x9148 >> 2),
163         0x00000000,
164         (0x8040 << 16) | (0x9148 >> 2),
165         0x00000000,
166         (0x9c00 << 16) | (0x9150 >> 2),
167         0x00000000,
168         (0x9c00 << 16) | (0x897c >> 2),
169         0x00000000,
170         (0x9c00 << 16) | (0x8d8c >> 2),
171         0x00000000,
172         (0x9c00 << 16) | (0xac54 >> 2),
173         0X00000000,
174         0x3,
175         (0x9c00 << 16) | (0x98f8 >> 2),
176         0x00000000,
177         (0x9c00 << 16) | (0x9910 >> 2),
178         0x00000000,
179         (0x9c00 << 16) | (0x9914 >> 2),
180         0x00000000,
181         (0x9c00 << 16) | (0x9918 >> 2),
182         0x00000000,
183         (0x9c00 << 16) | (0x991c >> 2),
184         0x00000000,
185         (0x9c00 << 16) | (0x9920 >> 2),
186         0x00000000,
187         (0x9c00 << 16) | (0x9924 >> 2),
188         0x00000000,
189         (0x9c00 << 16) | (0x9928 >> 2),
190         0x00000000,
191         (0x9c00 << 16) | (0x992c >> 2),
192         0x00000000,
193         (0x9c00 << 16) | (0x9930 >> 2),
194         0x00000000,
195         (0x9c00 << 16) | (0x9934 >> 2),
196         0x00000000,
197         (0x9c00 << 16) | (0x9938 >> 2),
198         0x00000000,
199         (0x9c00 << 16) | (0x993c >> 2),
200         0x00000000,
201         (0x9c00 << 16) | (0x9940 >> 2),
202         0x00000000,
203         (0x9c00 << 16) | (0x9944 >> 2),
204         0x00000000,
205         (0x9c00 << 16) | (0x9948 >> 2),
206         0x00000000,
207         (0x9c00 << 16) | (0x994c >> 2),
208         0x00000000,
209         (0x9c00 << 16) | (0x9950 >> 2),
210         0x00000000,
211         (0x9c00 << 16) | (0x9954 >> 2),
212         0x00000000,
213         (0x9c00 << 16) | (0x9958 >> 2),
214         0x00000000,
215         (0x9c00 << 16) | (0x995c >> 2),
216         0x00000000,
217         (0x9c00 << 16) | (0x9960 >> 2),
218         0x00000000,
219         (0x9c00 << 16) | (0x9964 >> 2),
220         0x00000000,
221         (0x9c00 << 16) | (0x9968 >> 2),
222         0x00000000,
223         (0x9c00 << 16) | (0x996c >> 2),
224         0x00000000,
225         (0x9c00 << 16) | (0x9970 >> 2),
226         0x00000000,
227         (0x9c00 << 16) | (0x9974 >> 2),
228         0x00000000,
229         (0x9c00 << 16) | (0x9978 >> 2),
230         0x00000000,
231         (0x9c00 << 16) | (0x997c >> 2),
232         0x00000000,
233         (0x9c00 << 16) | (0x9980 >> 2),
234         0x00000000,
235         (0x9c00 << 16) | (0x9984 >> 2),
236         0x00000000,
237         (0x9c00 << 16) | (0x9988 >> 2),
238         0x00000000,
239         (0x9c00 << 16) | (0x998c >> 2),
240         0x00000000,
241         (0x9c00 << 16) | (0x8c00 >> 2),
242         0x00000000,
243         (0x9c00 << 16) | (0x8c14 >> 2),
244         0x00000000,
245         (0x9c00 << 16) | (0x8c04 >> 2),
246         0x00000000,
247         (0x9c00 << 16) | (0x8c08 >> 2),
248         0x00000000,
249         (0x8000 << 16) | (0x9b7c >> 2),
250         0x00000000,
251         (0x8040 << 16) | (0x9b7c >> 2),
252         0x00000000,
253         (0x8000 << 16) | (0xe84 >> 2),
254         0x00000000,
255         (0x8040 << 16) | (0xe84 >> 2),
256         0x00000000,
257         (0x8000 << 16) | (0x89c0 >> 2),
258         0x00000000,
259         (0x8040 << 16) | (0x89c0 >> 2),
260         0x00000000,
261         (0x8000 << 16) | (0x914c >> 2),
262         0x00000000,
263         (0x8040 << 16) | (0x914c >> 2),
264         0x00000000,
265         (0x8000 << 16) | (0x8c20 >> 2),
266         0x00000000,
267         (0x8040 << 16) | (0x8c20 >> 2),
268         0x00000000,
269         (0x8000 << 16) | (0x9354 >> 2),
270         0x00000000,
271         (0x8040 << 16) | (0x9354 >> 2),
272         0x00000000,
273         (0x9c00 << 16) | (0x9060 >> 2),
274         0x00000000,
275         (0x9c00 << 16) | (0x9364 >> 2),
276         0x00000000,
277         (0x9c00 << 16) | (0x9100 >> 2),
278         0x00000000,
279         (0x9c00 << 16) | (0x913c >> 2),
280         0x00000000,
281         (0x8000 << 16) | (0x90e0 >> 2),
282         0x00000000,
283         (0x8000 << 16) | (0x90e4 >> 2),
284         0x00000000,
285         (0x8000 << 16) | (0x90e8 >> 2),
286         0x00000000,
287         (0x8040 << 16) | (0x90e0 >> 2),
288         0x00000000,
289         (0x8040 << 16) | (0x90e4 >> 2),
290         0x00000000,
291         (0x8040 << 16) | (0x90e8 >> 2),
292         0x00000000,
293         (0x9c00 << 16) | (0x8bcc >> 2),
294         0x00000000,
295         (0x9c00 << 16) | (0x8b24 >> 2),
296         0x00000000,
297         (0x9c00 << 16) | (0x88c4 >> 2),
298         0x00000000,
299         (0x9c00 << 16) | (0x8e50 >> 2),
300         0x00000000,
301         (0x9c00 << 16) | (0x8c0c >> 2),
302         0x00000000,
303         (0x9c00 << 16) | (0x8e58 >> 2),
304         0x00000000,
305         (0x9c00 << 16) | (0x8e5c >> 2),
306         0x00000000,
307         (0x9c00 << 16) | (0x9508 >> 2),
308         0x00000000,
309         (0x9c00 << 16) | (0x950c >> 2),
310         0x00000000,
311         (0x9c00 << 16) | (0x9494 >> 2),
312         0x00000000,
313         (0x9c00 << 16) | (0xac0c >> 2),
314         0x00000000,
315         (0x9c00 << 16) | (0xac10 >> 2),
316         0x00000000,
317         (0x9c00 << 16) | (0xac14 >> 2),
318         0x00000000,
319         (0x9c00 << 16) | (0xae00 >> 2),
320         0x00000000,
321         (0x9c00 << 16) | (0xac08 >> 2),
322         0x00000000,
323         (0x9c00 << 16) | (0x88d4 >> 2),
324         0x00000000,
325         (0x9c00 << 16) | (0x88c8 >> 2),
326         0x00000000,
327         (0x9c00 << 16) | (0x88cc >> 2),
328         0x00000000,
329         (0x9c00 << 16) | (0x89b0 >> 2),
330         0x00000000,
331         (0x9c00 << 16) | (0x8b10 >> 2),
332         0x00000000,
333         (0x9c00 << 16) | (0x8a14 >> 2),
334         0x00000000,
335         (0x9c00 << 16) | (0x9830 >> 2),
336         0x00000000,
337         (0x9c00 << 16) | (0x9834 >> 2),
338         0x00000000,
339         (0x9c00 << 16) | (0x9838 >> 2),
340         0x00000000,
341         (0x9c00 << 16) | (0x9a10 >> 2),
342         0x00000000,
343         (0x8000 << 16) | (0x9870 >> 2),
344         0x00000000,
345         (0x8000 << 16) | (0x9874 >> 2),
346         0x00000000,
347         (0x8001 << 16) | (0x9870 >> 2),
348         0x00000000,
349         (0x8001 << 16) | (0x9874 >> 2),
350         0x00000000,
351         (0x8040 << 16) | (0x9870 >> 2),
352         0x00000000,
353         (0x8040 << 16) | (0x9874 >> 2),
354         0x00000000,
355         (0x8041 << 16) | (0x9870 >> 2),
356         0x00000000,
357         (0x8041 << 16) | (0x9874 >> 2),
358         0x00000000,
359         0x00000000
360 };
361
362 static const u32 tahiti_golden_rlc_registers[] =
363 {
364         0xc424, 0xffffffff, 0x00601005,
365         0xc47c, 0xffffffff, 0x10104040,
366         0xc488, 0xffffffff, 0x0100000a,
367         0xc314, 0xffffffff, 0x00000800,
368         0xc30c, 0xffffffff, 0x800000f4,
369         0xf4a8, 0xffffffff, 0x00000000
370 };
371
372 static const u32 tahiti_golden_registers[] =
373 {
374         0x9a10, 0x00010000, 0x00018208,
375         0x9830, 0xffffffff, 0x00000000,
376         0x9834, 0xf00fffff, 0x00000400,
377         0x9838, 0x0002021c, 0x00020200,
378         0xc78, 0x00000080, 0x00000000,
379         0xd030, 0x000300c0, 0x00800040,
380         0xd830, 0x000300c0, 0x00800040,
381         0x5bb0, 0x000000f0, 0x00000070,
382         0x5bc0, 0x00200000, 0x50100000,
383         0x7030, 0x31000311, 0x00000011,
384         0x277c, 0x00000003, 0x000007ff,
385         0x240c, 0x000007ff, 0x00000000,
386         0x8a14, 0xf000001f, 0x00000007,
387         0x8b24, 0xffffffff, 0x00ffffff,
388         0x8b10, 0x0000ff0f, 0x00000000,
389         0x28a4c, 0x07ffffff, 0x4e000000,
390         0x28350, 0x3f3f3fff, 0x2a00126a,
391         0x30, 0x000000ff, 0x0040,
392         0x34, 0x00000040, 0x00004040,
393         0x9100, 0x07ffffff, 0x03000000,
394         0x8e88, 0x01ff1f3f, 0x00000000,
395         0x8e84, 0x01ff1f3f, 0x00000000,
396         0x9060, 0x0000007f, 0x00000020,
397         0x9508, 0x00010000, 0x00010000,
398         0xac14, 0x00000200, 0x000002fb,
399         0xac10, 0xffffffff, 0x0000543b,
400         0xac0c, 0xffffffff, 0xa9210876,
401         0x88d0, 0xffffffff, 0x000fff40,
402         0x88d4, 0x0000001f, 0x00000010,
403         0x1410, 0x20000000, 0x20fffed8,
404         0x15c0, 0x000c0fc0, 0x000c0400
405 };
406
407 static const u32 tahiti_golden_registers2[] =
408 {
409         0xc64, 0x00000001, 0x00000001
410 };
411
412 static const u32 pitcairn_golden_rlc_registers[] =
413 {
414         0xc424, 0xffffffff, 0x00601004,
415         0xc47c, 0xffffffff, 0x10102020,
416         0xc488, 0xffffffff, 0x01000020,
417         0xc314, 0xffffffff, 0x00000800,
418         0xc30c, 0xffffffff, 0x800000a4
419 };
420
421 static const u32 pitcairn_golden_registers[] =
422 {
423         0x9a10, 0x00010000, 0x00018208,
424         0x9830, 0xffffffff, 0x00000000,
425         0x9834, 0xf00fffff, 0x00000400,
426         0x9838, 0x0002021c, 0x00020200,
427         0xc78, 0x00000080, 0x00000000,
428         0xd030, 0x000300c0, 0x00800040,
429         0xd830, 0x000300c0, 0x00800040,
430         0x5bb0, 0x000000f0, 0x00000070,
431         0x5bc0, 0x00200000, 0x50100000,
432         0x7030, 0x31000311, 0x00000011,
433         0x2ae4, 0x00073ffe, 0x000022a2,
434         0x240c, 0x000007ff, 0x00000000,
435         0x8a14, 0xf000001f, 0x00000007,
436         0x8b24, 0xffffffff, 0x00ffffff,
437         0x8b10, 0x0000ff0f, 0x00000000,
438         0x28a4c, 0x07ffffff, 0x4e000000,
439         0x28350, 0x3f3f3fff, 0x2a00126a,
440         0x30, 0x000000ff, 0x0040,
441         0x34, 0x00000040, 0x00004040,
442         0x9100, 0x07ffffff, 0x03000000,
443         0x9060, 0x0000007f, 0x00000020,
444         0x9508, 0x00010000, 0x00010000,
445         0xac14, 0x000003ff, 0x000000f7,
446         0xac10, 0xffffffff, 0x00000000,
447         0xac0c, 0xffffffff, 0x32761054,
448         0x88d4, 0x0000001f, 0x00000010,
449         0x15c0, 0x000c0fc0, 0x000c0400
450 };
451
452 static const u32 verde_golden_rlc_registers[] =
453 {
454         0xc424, 0xffffffff, 0x033f1005,
455         0xc47c, 0xffffffff, 0x10808020,
456         0xc488, 0xffffffff, 0x00800008,
457         0xc314, 0xffffffff, 0x00001000,
458         0xc30c, 0xffffffff, 0x80010014
459 };
460
461 static const u32 verde_golden_registers[] =
462 {
463         0x9a10, 0x00010000, 0x00018208,
464         0x9830, 0xffffffff, 0x00000000,
465         0x9834, 0xf00fffff, 0x00000400,
466         0x9838, 0x0002021c, 0x00020200,
467         0xc78, 0x00000080, 0x00000000,
468         0xd030, 0x000300c0, 0x00800040,
469         0xd030, 0x000300c0, 0x00800040,
470         0xd830, 0x000300c0, 0x00800040,
471         0xd830, 0x000300c0, 0x00800040,
472         0x5bb0, 0x000000f0, 0x00000070,
473         0x5bc0, 0x00200000, 0x50100000,
474         0x7030, 0x31000311, 0x00000011,
475         0x2ae4, 0x00073ffe, 0x000022a2,
476         0x2ae4, 0x00073ffe, 0x000022a2,
477         0x2ae4, 0x00073ffe, 0x000022a2,
478         0x240c, 0x000007ff, 0x00000000,
479         0x240c, 0x000007ff, 0x00000000,
480         0x240c, 0x000007ff, 0x00000000,
481         0x8a14, 0xf000001f, 0x00000007,
482         0x8a14, 0xf000001f, 0x00000007,
483         0x8a14, 0xf000001f, 0x00000007,
484         0x8b24, 0xffffffff, 0x00ffffff,
485         0x8b10, 0x0000ff0f, 0x00000000,
486         0x28a4c, 0x07ffffff, 0x4e000000,
487         0x28350, 0x3f3f3fff, 0x0000124a,
488         0x28350, 0x3f3f3fff, 0x0000124a,
489         0x28350, 0x3f3f3fff, 0x0000124a,
490         0x30, 0x000000ff, 0x0040,
491         0x34, 0x00000040, 0x00004040,
492         0x9100, 0x07ffffff, 0x03000000,
493         0x9100, 0x07ffffff, 0x03000000,
494         0x8e88, 0x01ff1f3f, 0x00000000,
495         0x8e88, 0x01ff1f3f, 0x00000000,
496         0x8e88, 0x01ff1f3f, 0x00000000,
497         0x8e84, 0x01ff1f3f, 0x00000000,
498         0x8e84, 0x01ff1f3f, 0x00000000,
499         0x8e84, 0x01ff1f3f, 0x00000000,
500         0x9060, 0x0000007f, 0x00000020,
501         0x9508, 0x00010000, 0x00010000,
502         0xac14, 0x000003ff, 0x00000003,
503         0xac14, 0x000003ff, 0x00000003,
504         0xac14, 0x000003ff, 0x00000003,
505         0xac10, 0xffffffff, 0x00000000,
506         0xac10, 0xffffffff, 0x00000000,
507         0xac10, 0xffffffff, 0x00000000,
508         0xac0c, 0xffffffff, 0x00001032,
509         0xac0c, 0xffffffff, 0x00001032,
510         0xac0c, 0xffffffff, 0x00001032,
511         0x88d4, 0x0000001f, 0x00000010,
512         0x88d4, 0x0000001f, 0x00000010,
513         0x88d4, 0x0000001f, 0x00000010,
514         0x15c0, 0x000c0fc0, 0x000c0400
515 };
516
517 static const u32 oland_golden_rlc_registers[] =
518 {
519         0xc424, 0xffffffff, 0x00601005,
520         0xc47c, 0xffffffff, 0x10104040,
521         0xc488, 0xffffffff, 0x0100000a,
522         0xc314, 0xffffffff, 0x00000800,
523         0xc30c, 0xffffffff, 0x800000f4
524 };
525
526 static const u32 oland_golden_registers[] =
527 {
528         0x9a10, 0x00010000, 0x00018208,
529         0x9830, 0xffffffff, 0x00000000,
530         0x9834, 0xf00fffff, 0x00000400,
531         0x9838, 0x0002021c, 0x00020200,
532         0xc78, 0x00000080, 0x00000000,
533         0xd030, 0x000300c0, 0x00800040,
534         0xd830, 0x000300c0, 0x00800040,
535         0x5bb0, 0x000000f0, 0x00000070,
536         0x5bc0, 0x00200000, 0x50100000,
537         0x7030, 0x31000311, 0x00000011,
538         0x2ae4, 0x00073ffe, 0x000022a2,
539         0x240c, 0x000007ff, 0x00000000,
540         0x8a14, 0xf000001f, 0x00000007,
541         0x8b24, 0xffffffff, 0x00ffffff,
542         0x8b10, 0x0000ff0f, 0x00000000,
543         0x28a4c, 0x07ffffff, 0x4e000000,
544         0x28350, 0x3f3f3fff, 0x00000082,
545         0x30, 0x000000ff, 0x0040,
546         0x34, 0x00000040, 0x00004040,
547         0x9100, 0x07ffffff, 0x03000000,
548         0x9060, 0x0000007f, 0x00000020,
549         0x9508, 0x00010000, 0x00010000,
550         0xac14, 0x000003ff, 0x000000f3,
551         0xac10, 0xffffffff, 0x00000000,
552         0xac0c, 0xffffffff, 0x00003210,
553         0x88d4, 0x0000001f, 0x00000010,
554         0x15c0, 0x000c0fc0, 0x000c0400
555 };
556
557 static const u32 hainan_golden_registers[] =
558 {
559         0x9a10, 0x00010000, 0x00018208,
560         0x9830, 0xffffffff, 0x00000000,
561         0x9834, 0xf00fffff, 0x00000400,
562         0x9838, 0x0002021c, 0x00020200,
563         0xd0c0, 0xff000fff, 0x00000100,
564         0xd030, 0x000300c0, 0x00800040,
565         0xd8c0, 0xff000fff, 0x00000100,
566         0xd830, 0x000300c0, 0x00800040,
567         0x2ae4, 0x00073ffe, 0x000022a2,
568         0x240c, 0x000007ff, 0x00000000,
569         0x8a14, 0xf000001f, 0x00000007,
570         0x8b24, 0xffffffff, 0x00ffffff,
571         0x8b10, 0x0000ff0f, 0x00000000,
572         0x28a4c, 0x07ffffff, 0x4e000000,
573         0x28350, 0x3f3f3fff, 0x00000000,
574         0x30, 0x000000ff, 0x0040,
575         0x34, 0x00000040, 0x00004040,
576         0x9100, 0x03e00000, 0x03600000,
577         0x9060, 0x0000007f, 0x00000020,
578         0x9508, 0x00010000, 0x00010000,
579         0xac14, 0x000003ff, 0x000000f1,
580         0xac10, 0xffffffff, 0x00000000,
581         0xac0c, 0xffffffff, 0x00003210,
582         0x88d4, 0x0000001f, 0x00000010,
583         0x15c0, 0x000c0fc0, 0x000c0400
584 };
585
586 static const u32 hainan_golden_registers2[] =
587 {
588         0x98f8, 0xffffffff, 0x02010001
589 };
590
591 static const u32 tahiti_mgcg_cgcg_init[] =
592 {
593         0xc400, 0xffffffff, 0xfffffffc,
594         0x802c, 0xffffffff, 0xe0000000,
595         0x9a60, 0xffffffff, 0x00000100,
596         0x92a4, 0xffffffff, 0x00000100,
597         0xc164, 0xffffffff, 0x00000100,
598         0x9774, 0xffffffff, 0x00000100,
599         0x8984, 0xffffffff, 0x06000100,
600         0x8a18, 0xffffffff, 0x00000100,
601         0x92a0, 0xffffffff, 0x00000100,
602         0xc380, 0xffffffff, 0x00000100,
603         0x8b28, 0xffffffff, 0x00000100,
604         0x9144, 0xffffffff, 0x00000100,
605         0x8d88, 0xffffffff, 0x00000100,
606         0x8d8c, 0xffffffff, 0x00000100,
607         0x9030, 0xffffffff, 0x00000100,
608         0x9034, 0xffffffff, 0x00000100,
609         0x9038, 0xffffffff, 0x00000100,
610         0x903c, 0xffffffff, 0x00000100,
611         0xad80, 0xffffffff, 0x00000100,
612         0xac54, 0xffffffff, 0x00000100,
613         0x897c, 0xffffffff, 0x06000100,
614         0x9868, 0xffffffff, 0x00000100,
615         0x9510, 0xffffffff, 0x00000100,
616         0xaf04, 0xffffffff, 0x00000100,
617         0xae04, 0xffffffff, 0x00000100,
618         0x949c, 0xffffffff, 0x00000100,
619         0x802c, 0xffffffff, 0xe0000000,
620         0x9160, 0xffffffff, 0x00010000,
621         0x9164, 0xffffffff, 0x00030002,
622         0x9168, 0xffffffff, 0x00040007,
623         0x916c, 0xffffffff, 0x00060005,
624         0x9170, 0xffffffff, 0x00090008,
625         0x9174, 0xffffffff, 0x00020001,
626         0x9178, 0xffffffff, 0x00040003,
627         0x917c, 0xffffffff, 0x00000007,
628         0x9180, 0xffffffff, 0x00060005,
629         0x9184, 0xffffffff, 0x00090008,
630         0x9188, 0xffffffff, 0x00030002,
631         0x918c, 0xffffffff, 0x00050004,
632         0x9190, 0xffffffff, 0x00000008,
633         0x9194, 0xffffffff, 0x00070006,
634         0x9198, 0xffffffff, 0x000a0009,
635         0x919c, 0xffffffff, 0x00040003,
636         0x91a0, 0xffffffff, 0x00060005,
637         0x91a4, 0xffffffff, 0x00000009,
638         0x91a8, 0xffffffff, 0x00080007,
639         0x91ac, 0xffffffff, 0x000b000a,
640         0x91b0, 0xffffffff, 0x00050004,
641         0x91b4, 0xffffffff, 0x00070006,
642         0x91b8, 0xffffffff, 0x0008000b,
643         0x91bc, 0xffffffff, 0x000a0009,
644         0x91c0, 0xffffffff, 0x000d000c,
645         0x91c4, 0xffffffff, 0x00060005,
646         0x91c8, 0xffffffff, 0x00080007,
647         0x91cc, 0xffffffff, 0x0000000b,
648         0x91d0, 0xffffffff, 0x000a0009,
649         0x91d4, 0xffffffff, 0x000d000c,
650         0x91d8, 0xffffffff, 0x00070006,
651         0x91dc, 0xffffffff, 0x00090008,
652         0x91e0, 0xffffffff, 0x0000000c,
653         0x91e4, 0xffffffff, 0x000b000a,
654         0x91e8, 0xffffffff, 0x000e000d,
655         0x91ec, 0xffffffff, 0x00080007,
656         0x91f0, 0xffffffff, 0x000a0009,
657         0x91f4, 0xffffffff, 0x0000000d,
658         0x91f8, 0xffffffff, 0x000c000b,
659         0x91fc, 0xffffffff, 0x000f000e,
660         0x9200, 0xffffffff, 0x00090008,
661         0x9204, 0xffffffff, 0x000b000a,
662         0x9208, 0xffffffff, 0x000c000f,
663         0x920c, 0xffffffff, 0x000e000d,
664         0x9210, 0xffffffff, 0x00110010,
665         0x9214, 0xffffffff, 0x000a0009,
666         0x9218, 0xffffffff, 0x000c000b,
667         0x921c, 0xffffffff, 0x0000000f,
668         0x9220, 0xffffffff, 0x000e000d,
669         0x9224, 0xffffffff, 0x00110010,
670         0x9228, 0xffffffff, 0x000b000a,
671         0x922c, 0xffffffff, 0x000d000c,
672         0x9230, 0xffffffff, 0x00000010,
673         0x9234, 0xffffffff, 0x000f000e,
674         0x9238, 0xffffffff, 0x00120011,
675         0x923c, 0xffffffff, 0x000c000b,
676         0x9240, 0xffffffff, 0x000e000d,
677         0x9244, 0xffffffff, 0x00000011,
678         0x9248, 0xffffffff, 0x0010000f,
679         0x924c, 0xffffffff, 0x00130012,
680         0x9250, 0xffffffff, 0x000d000c,
681         0x9254, 0xffffffff, 0x000f000e,
682         0x9258, 0xffffffff, 0x00100013,
683         0x925c, 0xffffffff, 0x00120011,
684         0x9260, 0xffffffff, 0x00150014,
685         0x9264, 0xffffffff, 0x000e000d,
686         0x9268, 0xffffffff, 0x0010000f,
687         0x926c, 0xffffffff, 0x00000013,
688         0x9270, 0xffffffff, 0x00120011,
689         0x9274, 0xffffffff, 0x00150014,
690         0x9278, 0xffffffff, 0x000f000e,
691         0x927c, 0xffffffff, 0x00110010,
692         0x9280, 0xffffffff, 0x00000014,
693         0x9284, 0xffffffff, 0x00130012,
694         0x9288, 0xffffffff, 0x00160015,
695         0x928c, 0xffffffff, 0x0010000f,
696         0x9290, 0xffffffff, 0x00120011,
697         0x9294, 0xffffffff, 0x00000015,
698         0x9298, 0xffffffff, 0x00140013,
699         0x929c, 0xffffffff, 0x00170016,
700         0x9150, 0xffffffff, 0x96940200,
701         0x8708, 0xffffffff, 0x00900100,
702         0xc478, 0xffffffff, 0x00000080,
703         0xc404, 0xffffffff, 0x0020003f,
704         0x30, 0xffffffff, 0x0000001c,
705         0x34, 0x000f0000, 0x000f0000,
706         0x160c, 0xffffffff, 0x00000100,
707         0x1024, 0xffffffff, 0x00000100,
708         0x102c, 0x00000101, 0x00000000,
709         0x20a8, 0xffffffff, 0x00000104,
710         0x264c, 0x000c0000, 0x000c0000,
711         0x2648, 0x000c0000, 0x000c0000,
712         0x55e4, 0xff000fff, 0x00000100,
713         0x55e8, 0x00000001, 0x00000001,
714         0x2f50, 0x00000001, 0x00000001,
715         0x30cc, 0xc0000fff, 0x00000104,
716         0xc1e4, 0x00000001, 0x00000001,
717         0xd0c0, 0xfffffff0, 0x00000100,
718         0xd8c0, 0xfffffff0, 0x00000100
719 };
720
721 static const u32 pitcairn_mgcg_cgcg_init[] =
722 {
723         0xc400, 0xffffffff, 0xfffffffc,
724         0x802c, 0xffffffff, 0xe0000000,
725         0x9a60, 0xffffffff, 0x00000100,
726         0x92a4, 0xffffffff, 0x00000100,
727         0xc164, 0xffffffff, 0x00000100,
728         0x9774, 0xffffffff, 0x00000100,
729         0x8984, 0xffffffff, 0x06000100,
730         0x8a18, 0xffffffff, 0x00000100,
731         0x92a0, 0xffffffff, 0x00000100,
732         0xc380, 0xffffffff, 0x00000100,
733         0x8b28, 0xffffffff, 0x00000100,
734         0x9144, 0xffffffff, 0x00000100,
735         0x8d88, 0xffffffff, 0x00000100,
736         0x8d8c, 0xffffffff, 0x00000100,
737         0x9030, 0xffffffff, 0x00000100,
738         0x9034, 0xffffffff, 0x00000100,
739         0x9038, 0xffffffff, 0x00000100,
740         0x903c, 0xffffffff, 0x00000100,
741         0xad80, 0xffffffff, 0x00000100,
742         0xac54, 0xffffffff, 0x00000100,
743         0x897c, 0xffffffff, 0x06000100,
744         0x9868, 0xffffffff, 0x00000100,
745         0x9510, 0xffffffff, 0x00000100,
746         0xaf04, 0xffffffff, 0x00000100,
747         0xae04, 0xffffffff, 0x00000100,
748         0x949c, 0xffffffff, 0x00000100,
749         0x802c, 0xffffffff, 0xe0000000,
750         0x9160, 0xffffffff, 0x00010000,
751         0x9164, 0xffffffff, 0x00030002,
752         0x9168, 0xffffffff, 0x00040007,
753         0x916c, 0xffffffff, 0x00060005,
754         0x9170, 0xffffffff, 0x00090008,
755         0x9174, 0xffffffff, 0x00020001,
756         0x9178, 0xffffffff, 0x00040003,
757         0x917c, 0xffffffff, 0x00000007,
758         0x9180, 0xffffffff, 0x00060005,
759         0x9184, 0xffffffff, 0x00090008,
760         0x9188, 0xffffffff, 0x00030002,
761         0x918c, 0xffffffff, 0x00050004,
762         0x9190, 0xffffffff, 0x00000008,
763         0x9194, 0xffffffff, 0x00070006,
764         0x9198, 0xffffffff, 0x000a0009,
765         0x919c, 0xffffffff, 0x00040003,
766         0x91a0, 0xffffffff, 0x00060005,
767         0x91a4, 0xffffffff, 0x00000009,
768         0x91a8, 0xffffffff, 0x00080007,
769         0x91ac, 0xffffffff, 0x000b000a,
770         0x91b0, 0xffffffff, 0x00050004,
771         0x91b4, 0xffffffff, 0x00070006,
772         0x91b8, 0xffffffff, 0x0008000b,
773         0x91bc, 0xffffffff, 0x000a0009,
774         0x91c0, 0xffffffff, 0x000d000c,
775         0x9200, 0xffffffff, 0x00090008,
776         0x9204, 0xffffffff, 0x000b000a,
777         0x9208, 0xffffffff, 0x000c000f,
778         0x920c, 0xffffffff, 0x000e000d,
779         0x9210, 0xffffffff, 0x00110010,
780         0x9214, 0xffffffff, 0x000a0009,
781         0x9218, 0xffffffff, 0x000c000b,
782         0x921c, 0xffffffff, 0x0000000f,
783         0x9220, 0xffffffff, 0x000e000d,
784         0x9224, 0xffffffff, 0x00110010,
785         0x9228, 0xffffffff, 0x000b000a,
786         0x922c, 0xffffffff, 0x000d000c,
787         0x9230, 0xffffffff, 0x00000010,
788         0x9234, 0xffffffff, 0x000f000e,
789         0x9238, 0xffffffff, 0x00120011,
790         0x923c, 0xffffffff, 0x000c000b,
791         0x9240, 0xffffffff, 0x000e000d,
792         0x9244, 0xffffffff, 0x00000011,
793         0x9248, 0xffffffff, 0x0010000f,
794         0x924c, 0xffffffff, 0x00130012,
795         0x9250, 0xffffffff, 0x000d000c,
796         0x9254, 0xffffffff, 0x000f000e,
797         0x9258, 0xffffffff, 0x00100013,
798         0x925c, 0xffffffff, 0x00120011,
799         0x9260, 0xffffffff, 0x00150014,
800         0x9150, 0xffffffff, 0x96940200,
801         0x8708, 0xffffffff, 0x00900100,
802         0xc478, 0xffffffff, 0x00000080,
803         0xc404, 0xffffffff, 0x0020003f,
804         0x30, 0xffffffff, 0x0000001c,
805         0x34, 0x000f0000, 0x000f0000,
806         0x160c, 0xffffffff, 0x00000100,
807         0x1024, 0xffffffff, 0x00000100,
808         0x102c, 0x00000101, 0x00000000,
809         0x20a8, 0xffffffff, 0x00000104,
810         0x55e4, 0xff000fff, 0x00000100,
811         0x55e8, 0x00000001, 0x00000001,
812         0x2f50, 0x00000001, 0x00000001,
813         0x30cc, 0xc0000fff, 0x00000104,
814         0xc1e4, 0x00000001, 0x00000001,
815         0xd0c0, 0xfffffff0, 0x00000100,
816         0xd8c0, 0xfffffff0, 0x00000100
817 };
818
819 static const u32 verde_mgcg_cgcg_init[] =
820 {
821         0xc400, 0xffffffff, 0xfffffffc,
822         0x802c, 0xffffffff, 0xe0000000,
823         0x9a60, 0xffffffff, 0x00000100,
824         0x92a4, 0xffffffff, 0x00000100,
825         0xc164, 0xffffffff, 0x00000100,
826         0x9774, 0xffffffff, 0x00000100,
827         0x8984, 0xffffffff, 0x06000100,
828         0x8a18, 0xffffffff, 0x00000100,
829         0x92a0, 0xffffffff, 0x00000100,
830         0xc380, 0xffffffff, 0x00000100,
831         0x8b28, 0xffffffff, 0x00000100,
832         0x9144, 0xffffffff, 0x00000100,
833         0x8d88, 0xffffffff, 0x00000100,
834         0x8d8c, 0xffffffff, 0x00000100,
835         0x9030, 0xffffffff, 0x00000100,
836         0x9034, 0xffffffff, 0x00000100,
837         0x9038, 0xffffffff, 0x00000100,
838         0x903c, 0xffffffff, 0x00000100,
839         0xad80, 0xffffffff, 0x00000100,
840         0xac54, 0xffffffff, 0x00000100,
841         0x897c, 0xffffffff, 0x06000100,
842         0x9868, 0xffffffff, 0x00000100,
843         0x9510, 0xffffffff, 0x00000100,
844         0xaf04, 0xffffffff, 0x00000100,
845         0xae04, 0xffffffff, 0x00000100,
846         0x949c, 0xffffffff, 0x00000100,
847         0x802c, 0xffffffff, 0xe0000000,
848         0x9160, 0xffffffff, 0x00010000,
849         0x9164, 0xffffffff, 0x00030002,
850         0x9168, 0xffffffff, 0x00040007,
851         0x916c, 0xffffffff, 0x00060005,
852         0x9170, 0xffffffff, 0x00090008,
853         0x9174, 0xffffffff, 0x00020001,
854         0x9178, 0xffffffff, 0x00040003,
855         0x917c, 0xffffffff, 0x00000007,
856         0x9180, 0xffffffff, 0x00060005,
857         0x9184, 0xffffffff, 0x00090008,
858         0x9188, 0xffffffff, 0x00030002,
859         0x918c, 0xffffffff, 0x00050004,
860         0x9190, 0xffffffff, 0x00000008,
861         0x9194, 0xffffffff, 0x00070006,
862         0x9198, 0xffffffff, 0x000a0009,
863         0x919c, 0xffffffff, 0x00040003,
864         0x91a0, 0xffffffff, 0x00060005,
865         0x91a4, 0xffffffff, 0x00000009,
866         0x91a8, 0xffffffff, 0x00080007,
867         0x91ac, 0xffffffff, 0x000b000a,
868         0x91b0, 0xffffffff, 0x00050004,
869         0x91b4, 0xffffffff, 0x00070006,
870         0x91b8, 0xffffffff, 0x0008000b,
871         0x91bc, 0xffffffff, 0x000a0009,
872         0x91c0, 0xffffffff, 0x000d000c,
873         0x9200, 0xffffffff, 0x00090008,
874         0x9204, 0xffffffff, 0x000b000a,
875         0x9208, 0xffffffff, 0x000c000f,
876         0x920c, 0xffffffff, 0x000e000d,
877         0x9210, 0xffffffff, 0x00110010,
878         0x9214, 0xffffffff, 0x000a0009,
879         0x9218, 0xffffffff, 0x000c000b,
880         0x921c, 0xffffffff, 0x0000000f,
881         0x9220, 0xffffffff, 0x000e000d,
882         0x9224, 0xffffffff, 0x00110010,
883         0x9228, 0xffffffff, 0x000b000a,
884         0x922c, 0xffffffff, 0x000d000c,
885         0x9230, 0xffffffff, 0x00000010,
886         0x9234, 0xffffffff, 0x000f000e,
887         0x9238, 0xffffffff, 0x00120011,
888         0x923c, 0xffffffff, 0x000c000b,
889         0x9240, 0xffffffff, 0x000e000d,
890         0x9244, 0xffffffff, 0x00000011,
891         0x9248, 0xffffffff, 0x0010000f,
892         0x924c, 0xffffffff, 0x00130012,
893         0x9250, 0xffffffff, 0x000d000c,
894         0x9254, 0xffffffff, 0x000f000e,
895         0x9258, 0xffffffff, 0x00100013,
896         0x925c, 0xffffffff, 0x00120011,
897         0x9260, 0xffffffff, 0x00150014,
898         0x9150, 0xffffffff, 0x96940200,
899         0x8708, 0xffffffff, 0x00900100,
900         0xc478, 0xffffffff, 0x00000080,
901         0xc404, 0xffffffff, 0x0020003f,
902         0x30, 0xffffffff, 0x0000001c,
903         0x34, 0x000f0000, 0x000f0000,
904         0x160c, 0xffffffff, 0x00000100,
905         0x1024, 0xffffffff, 0x00000100,
906         0x102c, 0x00000101, 0x00000000,
907         0x20a8, 0xffffffff, 0x00000104,
908         0x264c, 0x000c0000, 0x000c0000,
909         0x2648, 0x000c0000, 0x000c0000,
910         0x55e4, 0xff000fff, 0x00000100,
911         0x55e8, 0x00000001, 0x00000001,
912         0x2f50, 0x00000001, 0x00000001,
913         0x30cc, 0xc0000fff, 0x00000104,
914         0xc1e4, 0x00000001, 0x00000001,
915         0xd0c0, 0xfffffff0, 0x00000100,
916         0xd8c0, 0xfffffff0, 0x00000100
917 };
918
919 static const u32 oland_mgcg_cgcg_init[] =
920 {
921         0xc400, 0xffffffff, 0xfffffffc,
922         0x802c, 0xffffffff, 0xe0000000,
923         0x9a60, 0xffffffff, 0x00000100,
924         0x92a4, 0xffffffff, 0x00000100,
925         0xc164, 0xffffffff, 0x00000100,
926         0x9774, 0xffffffff, 0x00000100,
927         0x8984, 0xffffffff, 0x06000100,
928         0x8a18, 0xffffffff, 0x00000100,
929         0x92a0, 0xffffffff, 0x00000100,
930         0xc380, 0xffffffff, 0x00000100,
931         0x8b28, 0xffffffff, 0x00000100,
932         0x9144, 0xffffffff, 0x00000100,
933         0x8d88, 0xffffffff, 0x00000100,
934         0x8d8c, 0xffffffff, 0x00000100,
935         0x9030, 0xffffffff, 0x00000100,
936         0x9034, 0xffffffff, 0x00000100,
937         0x9038, 0xffffffff, 0x00000100,
938         0x903c, 0xffffffff, 0x00000100,
939         0xad80, 0xffffffff, 0x00000100,
940         0xac54, 0xffffffff, 0x00000100,
941         0x897c, 0xffffffff, 0x06000100,
942         0x9868, 0xffffffff, 0x00000100,
943         0x9510, 0xffffffff, 0x00000100,
944         0xaf04, 0xffffffff, 0x00000100,
945         0xae04, 0xffffffff, 0x00000100,
946         0x949c, 0xffffffff, 0x00000100,
947         0x802c, 0xffffffff, 0xe0000000,
948         0x9160, 0xffffffff, 0x00010000,
949         0x9164, 0xffffffff, 0x00030002,
950         0x9168, 0xffffffff, 0x00040007,
951         0x916c, 0xffffffff, 0x00060005,
952         0x9170, 0xffffffff, 0x00090008,
953         0x9174, 0xffffffff, 0x00020001,
954         0x9178, 0xffffffff, 0x00040003,
955         0x917c, 0xffffffff, 0x00000007,
956         0x9180, 0xffffffff, 0x00060005,
957         0x9184, 0xffffffff, 0x00090008,
958         0x9188, 0xffffffff, 0x00030002,
959         0x918c, 0xffffffff, 0x00050004,
960         0x9190, 0xffffffff, 0x00000008,
961         0x9194, 0xffffffff, 0x00070006,
962         0x9198, 0xffffffff, 0x000a0009,
963         0x919c, 0xffffffff, 0x00040003,
964         0x91a0, 0xffffffff, 0x00060005,
965         0x91a4, 0xffffffff, 0x00000009,
966         0x91a8, 0xffffffff, 0x00080007,
967         0x91ac, 0xffffffff, 0x000b000a,
968         0x91b0, 0xffffffff, 0x00050004,
969         0x91b4, 0xffffffff, 0x00070006,
970         0x91b8, 0xffffffff, 0x0008000b,
971         0x91bc, 0xffffffff, 0x000a0009,
972         0x91c0, 0xffffffff, 0x000d000c,
973         0x91c4, 0xffffffff, 0x00060005,
974         0x91c8, 0xffffffff, 0x00080007,
975         0x91cc, 0xffffffff, 0x0000000b,
976         0x91d0, 0xffffffff, 0x000a0009,
977         0x91d4, 0xffffffff, 0x000d000c,
978         0x9150, 0xffffffff, 0x96940200,
979         0x8708, 0xffffffff, 0x00900100,
980         0xc478, 0xffffffff, 0x00000080,
981         0xc404, 0xffffffff, 0x0020003f,
982         0x30, 0xffffffff, 0x0000001c,
983         0x34, 0x000f0000, 0x000f0000,
984         0x160c, 0xffffffff, 0x00000100,
985         0x1024, 0xffffffff, 0x00000100,
986         0x102c, 0x00000101, 0x00000000,
987         0x20a8, 0xffffffff, 0x00000104,
988         0x264c, 0x000c0000, 0x000c0000,
989         0x2648, 0x000c0000, 0x000c0000,
990         0x55e4, 0xff000fff, 0x00000100,
991         0x55e8, 0x00000001, 0x00000001,
992         0x2f50, 0x00000001, 0x00000001,
993         0x30cc, 0xc0000fff, 0x00000104,
994         0xc1e4, 0x00000001, 0x00000001,
995         0xd0c0, 0xfffffff0, 0x00000100,
996         0xd8c0, 0xfffffff0, 0x00000100
997 };
998
999 static const u32 hainan_mgcg_cgcg_init[] =
1000 {
1001         0xc400, 0xffffffff, 0xfffffffc,
1002         0x802c, 0xffffffff, 0xe0000000,
1003         0x9a60, 0xffffffff, 0x00000100,
1004         0x92a4, 0xffffffff, 0x00000100,
1005         0xc164, 0xffffffff, 0x00000100,
1006         0x9774, 0xffffffff, 0x00000100,
1007         0x8984, 0xffffffff, 0x06000100,
1008         0x8a18, 0xffffffff, 0x00000100,
1009         0x92a0, 0xffffffff, 0x00000100,
1010         0xc380, 0xffffffff, 0x00000100,
1011         0x8b28, 0xffffffff, 0x00000100,
1012         0x9144, 0xffffffff, 0x00000100,
1013         0x8d88, 0xffffffff, 0x00000100,
1014         0x8d8c, 0xffffffff, 0x00000100,
1015         0x9030, 0xffffffff, 0x00000100,
1016         0x9034, 0xffffffff, 0x00000100,
1017         0x9038, 0xffffffff, 0x00000100,
1018         0x903c, 0xffffffff, 0x00000100,
1019         0xad80, 0xffffffff, 0x00000100,
1020         0xac54, 0xffffffff, 0x00000100,
1021         0x897c, 0xffffffff, 0x06000100,
1022         0x9868, 0xffffffff, 0x00000100,
1023         0x9510, 0xffffffff, 0x00000100,
1024         0xaf04, 0xffffffff, 0x00000100,
1025         0xae04, 0xffffffff, 0x00000100,
1026         0x949c, 0xffffffff, 0x00000100,
1027         0x802c, 0xffffffff, 0xe0000000,
1028         0x9160, 0xffffffff, 0x00010000,
1029         0x9164, 0xffffffff, 0x00030002,
1030         0x9168, 0xffffffff, 0x00040007,
1031         0x916c, 0xffffffff, 0x00060005,
1032         0x9170, 0xffffffff, 0x00090008,
1033         0x9174, 0xffffffff, 0x00020001,
1034         0x9178, 0xffffffff, 0x00040003,
1035         0x917c, 0xffffffff, 0x00000007,
1036         0x9180, 0xffffffff, 0x00060005,
1037         0x9184, 0xffffffff, 0x00090008,
1038         0x9188, 0xffffffff, 0x00030002,
1039         0x918c, 0xffffffff, 0x00050004,
1040         0x9190, 0xffffffff, 0x00000008,
1041         0x9194, 0xffffffff, 0x00070006,
1042         0x9198, 0xffffffff, 0x000a0009,
1043         0x919c, 0xffffffff, 0x00040003,
1044         0x91a0, 0xffffffff, 0x00060005,
1045         0x91a4, 0xffffffff, 0x00000009,
1046         0x91a8, 0xffffffff, 0x00080007,
1047         0x91ac, 0xffffffff, 0x000b000a,
1048         0x91b0, 0xffffffff, 0x00050004,
1049         0x91b4, 0xffffffff, 0x00070006,
1050         0x91b8, 0xffffffff, 0x0008000b,
1051         0x91bc, 0xffffffff, 0x000a0009,
1052         0x91c0, 0xffffffff, 0x000d000c,
1053         0x91c4, 0xffffffff, 0x00060005,
1054         0x91c8, 0xffffffff, 0x00080007,
1055         0x91cc, 0xffffffff, 0x0000000b,
1056         0x91d0, 0xffffffff, 0x000a0009,
1057         0x91d4, 0xffffffff, 0x000d000c,
1058         0x9150, 0xffffffff, 0x96940200,
1059         0x8708, 0xffffffff, 0x00900100,
1060         0xc478, 0xffffffff, 0x00000080,
1061         0xc404, 0xffffffff, 0x0020003f,
1062         0x30, 0xffffffff, 0x0000001c,
1063         0x34, 0x000f0000, 0x000f0000,
1064         0x160c, 0xffffffff, 0x00000100,
1065         0x1024, 0xffffffff, 0x00000100,
1066         0x20a8, 0xffffffff, 0x00000104,
1067         0x264c, 0x000c0000, 0x000c0000,
1068         0x2648, 0x000c0000, 0x000c0000,
1069         0x2f50, 0x00000001, 0x00000001,
1070         0x30cc, 0xc0000fff, 0x00000104,
1071         0xc1e4, 0x00000001, 0x00000001,
1072         0xd0c0, 0xfffffff0, 0x00000100,
1073         0xd8c0, 0xfffffff0, 0x00000100
1074 };
1075
1076 static u32 verde_pg_init[] =
1077 {
1078         0x353c, 0xffffffff, 0x40000,
1079         0x3538, 0xffffffff, 0x200010ff,
1080         0x353c, 0xffffffff, 0x0,
1081         0x353c, 0xffffffff, 0x0,
1082         0x353c, 0xffffffff, 0x0,
1083         0x353c, 0xffffffff, 0x0,
1084         0x353c, 0xffffffff, 0x0,
1085         0x353c, 0xffffffff, 0x7007,
1086         0x3538, 0xffffffff, 0x300010ff,
1087         0x353c, 0xffffffff, 0x0,
1088         0x353c, 0xffffffff, 0x0,
1089         0x353c, 0xffffffff, 0x0,
1090         0x353c, 0xffffffff, 0x0,
1091         0x353c, 0xffffffff, 0x0,
1092         0x353c, 0xffffffff, 0x400000,
1093         0x3538, 0xffffffff, 0x100010ff,
1094         0x353c, 0xffffffff, 0x0,
1095         0x353c, 0xffffffff, 0x0,
1096         0x353c, 0xffffffff, 0x0,
1097         0x353c, 0xffffffff, 0x0,
1098         0x353c, 0xffffffff, 0x0,
1099         0x353c, 0xffffffff, 0x120200,
1100         0x3538, 0xffffffff, 0x500010ff,
1101         0x353c, 0xffffffff, 0x0,
1102         0x353c, 0xffffffff, 0x0,
1103         0x353c, 0xffffffff, 0x0,
1104         0x353c, 0xffffffff, 0x0,
1105         0x353c, 0xffffffff, 0x0,
1106         0x353c, 0xffffffff, 0x1e1e16,
1107         0x3538, 0xffffffff, 0x600010ff,
1108         0x353c, 0xffffffff, 0x0,
1109         0x353c, 0xffffffff, 0x0,
1110         0x353c, 0xffffffff, 0x0,
1111         0x353c, 0xffffffff, 0x0,
1112         0x353c, 0xffffffff, 0x0,
1113         0x353c, 0xffffffff, 0x171f1e,
1114         0x3538, 0xffffffff, 0x700010ff,
1115         0x353c, 0xffffffff, 0x0,
1116         0x353c, 0xffffffff, 0x0,
1117         0x353c, 0xffffffff, 0x0,
1118         0x353c, 0xffffffff, 0x0,
1119         0x353c, 0xffffffff, 0x0,
1120         0x353c, 0xffffffff, 0x0,
1121         0x3538, 0xffffffff, 0x9ff,
1122         0x3500, 0xffffffff, 0x0,
1123         0x3504, 0xffffffff, 0x10000800,
1124         0x3504, 0xffffffff, 0xf,
1125         0x3504, 0xffffffff, 0xf,
1126         0x3500, 0xffffffff, 0x4,
1127         0x3504, 0xffffffff, 0x1000051e,
1128         0x3504, 0xffffffff, 0xffff,
1129         0x3504, 0xffffffff, 0xffff,
1130         0x3500, 0xffffffff, 0x8,
1131         0x3504, 0xffffffff, 0x80500,
1132         0x3500, 0xffffffff, 0x12,
1133         0x3504, 0xffffffff, 0x9050c,
1134         0x3500, 0xffffffff, 0x1d,
1135         0x3504, 0xffffffff, 0xb052c,
1136         0x3500, 0xffffffff, 0x2a,
1137         0x3504, 0xffffffff, 0x1053e,
1138         0x3500, 0xffffffff, 0x2d,
1139         0x3504, 0xffffffff, 0x10546,
1140         0x3500, 0xffffffff, 0x30,
1141         0x3504, 0xffffffff, 0xa054e,
1142         0x3500, 0xffffffff, 0x3c,
1143         0x3504, 0xffffffff, 0x1055f,
1144         0x3500, 0xffffffff, 0x3f,
1145         0x3504, 0xffffffff, 0x10567,
1146         0x3500, 0xffffffff, 0x42,
1147         0x3504, 0xffffffff, 0x1056f,
1148         0x3500, 0xffffffff, 0x45,
1149         0x3504, 0xffffffff, 0x10572,
1150         0x3500, 0xffffffff, 0x48,
1151         0x3504, 0xffffffff, 0x20575,
1152         0x3500, 0xffffffff, 0x4c,
1153         0x3504, 0xffffffff, 0x190801,
1154         0x3500, 0xffffffff, 0x67,
1155         0x3504, 0xffffffff, 0x1082a,
1156         0x3500, 0xffffffff, 0x6a,
1157         0x3504, 0xffffffff, 0x1b082d,
1158         0x3500, 0xffffffff, 0x87,
1159         0x3504, 0xffffffff, 0x310851,
1160         0x3500, 0xffffffff, 0xba,
1161         0x3504, 0xffffffff, 0x891,
1162         0x3500, 0xffffffff, 0xbc,
1163         0x3504, 0xffffffff, 0x893,
1164         0x3500, 0xffffffff, 0xbe,
1165         0x3504, 0xffffffff, 0x20895,
1166         0x3500, 0xffffffff, 0xc2,
1167         0x3504, 0xffffffff, 0x20899,
1168         0x3500, 0xffffffff, 0xc6,
1169         0x3504, 0xffffffff, 0x2089d,
1170         0x3500, 0xffffffff, 0xca,
1171         0x3504, 0xffffffff, 0x8a1,
1172         0x3500, 0xffffffff, 0xcc,
1173         0x3504, 0xffffffff, 0x8a3,
1174         0x3500, 0xffffffff, 0xce,
1175         0x3504, 0xffffffff, 0x308a5,
1176         0x3500, 0xffffffff, 0xd3,
1177         0x3504, 0xffffffff, 0x6d08cd,
1178         0x3500, 0xffffffff, 0x142,
1179         0x3504, 0xffffffff, 0x2000095a,
1180         0x3504, 0xffffffff, 0x1,
1181         0x3500, 0xffffffff, 0x144,
1182         0x3504, 0xffffffff, 0x301f095b,
1183         0x3500, 0xffffffff, 0x165,
1184         0x3504, 0xffffffff, 0xc094d,
1185         0x3500, 0xffffffff, 0x173,
1186         0x3504, 0xffffffff, 0xf096d,
1187         0x3500, 0xffffffff, 0x184,
1188         0x3504, 0xffffffff, 0x15097f,
1189         0x3500, 0xffffffff, 0x19b,
1190         0x3504, 0xffffffff, 0xc0998,
1191         0x3500, 0xffffffff, 0x1a9,
1192         0x3504, 0xffffffff, 0x409a7,
1193         0x3500, 0xffffffff, 0x1af,
1194         0x3504, 0xffffffff, 0xcdc,
1195         0x3500, 0xffffffff, 0x1b1,
1196         0x3504, 0xffffffff, 0x800,
1197         0x3508, 0xffffffff, 0x6c9b2000,
1198         0x3510, 0xfc00, 0x2000,
1199         0x3544, 0xffffffff, 0xfc0,
1200         0x28d4, 0x00000100, 0x100
1201 };
1202
1203 static void si_init_golden_registers(struct radeon_device *rdev)
1204 {
1205         switch (rdev->family) {
1206         case CHIP_TAHITI:
1207                 radeon_program_register_sequence(rdev,
1208                                                  tahiti_golden_registers,
1209                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers));
1210                 radeon_program_register_sequence(rdev,
1211                                                  tahiti_golden_rlc_registers,
1212                                                  (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1213                 radeon_program_register_sequence(rdev,
1214                                                  tahiti_mgcg_cgcg_init,
1215                                                  (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1216                 radeon_program_register_sequence(rdev,
1217                                                  tahiti_golden_registers2,
1218                                                  (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1219                 break;
1220         case CHIP_PITCAIRN:
1221                 radeon_program_register_sequence(rdev,
1222                                                  pitcairn_golden_registers,
1223                                                  (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1224                 radeon_program_register_sequence(rdev,
1225                                                  pitcairn_golden_rlc_registers,
1226                                                  (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1227                 radeon_program_register_sequence(rdev,
1228                                                  pitcairn_mgcg_cgcg_init,
1229                                                  (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1230                 break;
1231         case CHIP_VERDE:
1232                 radeon_program_register_sequence(rdev,
1233                                                  verde_golden_registers,
1234                                                  (const u32)ARRAY_SIZE(verde_golden_registers));
1235                 radeon_program_register_sequence(rdev,
1236                                                  verde_golden_rlc_registers,
1237                                                  (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1238                 radeon_program_register_sequence(rdev,
1239                                                  verde_mgcg_cgcg_init,
1240                                                  (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1241                 radeon_program_register_sequence(rdev,
1242                                                  verde_pg_init,
1243                                                  (const u32)ARRAY_SIZE(verde_pg_init));
1244                 break;
1245         case CHIP_OLAND:
1246                 radeon_program_register_sequence(rdev,
1247                                                  oland_golden_registers,
1248                                                  (const u32)ARRAY_SIZE(oland_golden_registers));
1249                 radeon_program_register_sequence(rdev,
1250                                                  oland_golden_rlc_registers,
1251                                                  (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1252                 radeon_program_register_sequence(rdev,
1253                                                  oland_mgcg_cgcg_init,
1254                                                  (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1255                 break;
1256         case CHIP_HAINAN:
1257                 radeon_program_register_sequence(rdev,
1258                                                  hainan_golden_registers,
1259                                                  (const u32)ARRAY_SIZE(hainan_golden_registers));
1260                 radeon_program_register_sequence(rdev,
1261                                                  hainan_golden_registers2,
1262                                                  (const u32)ARRAY_SIZE(hainan_golden_registers2));
1263                 radeon_program_register_sequence(rdev,
1264                                                  hainan_mgcg_cgcg_init,
1265                                                  (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1266                 break;
1267         default:
1268                 break;
1269         }
1270 }
1271
1272 /**
1273  * si_get_allowed_info_register - fetch the register for the info ioctl
1274  *
1275  * @rdev: radeon_device pointer
1276  * @reg: register offset in bytes
1277  * @val: register value
1278  *
1279  * Returns 0 for success or -EINVAL for an invalid register
1280  *
1281  */
1282 int si_get_allowed_info_register(struct radeon_device *rdev,
1283                                  u32 reg, u32 *val)
1284 {
1285         switch (reg) {
1286         case GRBM_STATUS:
1287         case GRBM_STATUS2:
1288         case GRBM_STATUS_SE0:
1289         case GRBM_STATUS_SE1:
1290         case SRBM_STATUS:
1291         case SRBM_STATUS2:
1292         case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1293         case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1294         case UVD_STATUS:
1295                 *val = RREG32(reg);
1296                 return 0;
1297         default:
1298                 return -EINVAL;
1299         }
1300 }
1301
1302 #define PCIE_BUS_CLK                10000
1303 #define TCLK                        (PCIE_BUS_CLK / 10)
1304
1305 /**
1306  * si_get_xclk - get the xclk
1307  *
1308  * @rdev: radeon_device pointer
1309  *
1310  * Returns the reference clock used by the gfx engine
1311  * (SI).
1312  */
1313 u32 si_get_xclk(struct radeon_device *rdev)
1314 {
1315         u32 reference_clock = rdev->clock.spll.reference_freq;
1316         u32 tmp;
1317
1318         tmp = RREG32(CG_CLKPIN_CNTL_2);
1319         if (tmp & MUX_TCLK_TO_XCLK)
1320                 return TCLK;
1321
1322         tmp = RREG32(CG_CLKPIN_CNTL);
1323         if (tmp & XTALIN_DIVIDE)
1324                 return reference_clock / 4;
1325
1326         return reference_clock;
1327 }
1328
1329 /* get temperature in millidegrees */
1330 int si_get_temp(struct radeon_device *rdev)
1331 {
1332         u32 temp;
1333         int actual_temp = 0;
1334
1335         temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1336                 CTF_TEMP_SHIFT;
1337
1338         if (temp & 0x200)
1339                 actual_temp = 255;
1340         else
1341                 actual_temp = temp & 0x1ff;
1342
1343         actual_temp = (actual_temp * 1000);
1344
1345         return actual_temp;
1346 }
1347
1348 #define TAHITI_IO_MC_REGS_SIZE 36
1349
1350 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1351         {0x0000006f, 0x03044000},
1352         {0x00000070, 0x0480c018},
1353         {0x00000071, 0x00000040},
1354         {0x00000072, 0x01000000},
1355         {0x00000074, 0x000000ff},
1356         {0x00000075, 0x00143400},
1357         {0x00000076, 0x08ec0800},
1358         {0x00000077, 0x040000cc},
1359         {0x00000079, 0x00000000},
1360         {0x0000007a, 0x21000409},
1361         {0x0000007c, 0x00000000},
1362         {0x0000007d, 0xe8000000},
1363         {0x0000007e, 0x044408a8},
1364         {0x0000007f, 0x00000003},
1365         {0x00000080, 0x00000000},
1366         {0x00000081, 0x01000000},
1367         {0x00000082, 0x02000000},
1368         {0x00000083, 0x00000000},
1369         {0x00000084, 0xe3f3e4f4},
1370         {0x00000085, 0x00052024},
1371         {0x00000087, 0x00000000},
1372         {0x00000088, 0x66036603},
1373         {0x00000089, 0x01000000},
1374         {0x0000008b, 0x1c0a0000},
1375         {0x0000008c, 0xff010000},
1376         {0x0000008e, 0xffffefff},
1377         {0x0000008f, 0xfff3efff},
1378         {0x00000090, 0xfff3efbf},
1379         {0x00000094, 0x00101101},
1380         {0x00000095, 0x00000fff},
1381         {0x00000096, 0x00116fff},
1382         {0x00000097, 0x60010000},
1383         {0x00000098, 0x10010000},
1384         {0x00000099, 0x00006000},
1385         {0x0000009a, 0x00001000},
1386         {0x0000009f, 0x00a77400}
1387 };
1388
1389 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1390         {0x0000006f, 0x03044000},
1391         {0x00000070, 0x0480c018},
1392         {0x00000071, 0x00000040},
1393         {0x00000072, 0x01000000},
1394         {0x00000074, 0x000000ff},
1395         {0x00000075, 0x00143400},
1396         {0x00000076, 0x08ec0800},
1397         {0x00000077, 0x040000cc},
1398         {0x00000079, 0x00000000},
1399         {0x0000007a, 0x21000409},
1400         {0x0000007c, 0x00000000},
1401         {0x0000007d, 0xe8000000},
1402         {0x0000007e, 0x044408a8},
1403         {0x0000007f, 0x00000003},
1404         {0x00000080, 0x00000000},
1405         {0x00000081, 0x01000000},
1406         {0x00000082, 0x02000000},
1407         {0x00000083, 0x00000000},
1408         {0x00000084, 0xe3f3e4f4},
1409         {0x00000085, 0x00052024},
1410         {0x00000087, 0x00000000},
1411         {0x00000088, 0x66036603},
1412         {0x00000089, 0x01000000},
1413         {0x0000008b, 0x1c0a0000},
1414         {0x0000008c, 0xff010000},
1415         {0x0000008e, 0xffffefff},
1416         {0x0000008f, 0xfff3efff},
1417         {0x00000090, 0xfff3efbf},
1418         {0x00000094, 0x00101101},
1419         {0x00000095, 0x00000fff},
1420         {0x00000096, 0x00116fff},
1421         {0x00000097, 0x60010000},
1422         {0x00000098, 0x10010000},
1423         {0x00000099, 0x00006000},
1424         {0x0000009a, 0x00001000},
1425         {0x0000009f, 0x00a47400}
1426 };
1427
1428 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1429         {0x0000006f, 0x03044000},
1430         {0x00000070, 0x0480c018},
1431         {0x00000071, 0x00000040},
1432         {0x00000072, 0x01000000},
1433         {0x00000074, 0x000000ff},
1434         {0x00000075, 0x00143400},
1435         {0x00000076, 0x08ec0800},
1436         {0x00000077, 0x040000cc},
1437         {0x00000079, 0x00000000},
1438         {0x0000007a, 0x21000409},
1439         {0x0000007c, 0x00000000},
1440         {0x0000007d, 0xe8000000},
1441         {0x0000007e, 0x044408a8},
1442         {0x0000007f, 0x00000003},
1443         {0x00000080, 0x00000000},
1444         {0x00000081, 0x01000000},
1445         {0x00000082, 0x02000000},
1446         {0x00000083, 0x00000000},
1447         {0x00000084, 0xe3f3e4f4},
1448         {0x00000085, 0x00052024},
1449         {0x00000087, 0x00000000},
1450         {0x00000088, 0x66036603},
1451         {0x00000089, 0x01000000},
1452         {0x0000008b, 0x1c0a0000},
1453         {0x0000008c, 0xff010000},
1454         {0x0000008e, 0xffffefff},
1455         {0x0000008f, 0xfff3efff},
1456         {0x00000090, 0xfff3efbf},
1457         {0x00000094, 0x00101101},
1458         {0x00000095, 0x00000fff},
1459         {0x00000096, 0x00116fff},
1460         {0x00000097, 0x60010000},
1461         {0x00000098, 0x10010000},
1462         {0x00000099, 0x00006000},
1463         {0x0000009a, 0x00001000},
1464         {0x0000009f, 0x00a37400}
1465 };
1466
1467 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1468         {0x0000006f, 0x03044000},
1469         {0x00000070, 0x0480c018},
1470         {0x00000071, 0x00000040},
1471         {0x00000072, 0x01000000},
1472         {0x00000074, 0x000000ff},
1473         {0x00000075, 0x00143400},
1474         {0x00000076, 0x08ec0800},
1475         {0x00000077, 0x040000cc},
1476         {0x00000079, 0x00000000},
1477         {0x0000007a, 0x21000409},
1478         {0x0000007c, 0x00000000},
1479         {0x0000007d, 0xe8000000},
1480         {0x0000007e, 0x044408a8},
1481         {0x0000007f, 0x00000003},
1482         {0x00000080, 0x00000000},
1483         {0x00000081, 0x01000000},
1484         {0x00000082, 0x02000000},
1485         {0x00000083, 0x00000000},
1486         {0x00000084, 0xe3f3e4f4},
1487         {0x00000085, 0x00052024},
1488         {0x00000087, 0x00000000},
1489         {0x00000088, 0x66036603},
1490         {0x00000089, 0x01000000},
1491         {0x0000008b, 0x1c0a0000},
1492         {0x0000008c, 0xff010000},
1493         {0x0000008e, 0xffffefff},
1494         {0x0000008f, 0xfff3efff},
1495         {0x00000090, 0xfff3efbf},
1496         {0x00000094, 0x00101101},
1497         {0x00000095, 0x00000fff},
1498         {0x00000096, 0x00116fff},
1499         {0x00000097, 0x60010000},
1500         {0x00000098, 0x10010000},
1501         {0x00000099, 0x00006000},
1502         {0x0000009a, 0x00001000},
1503         {0x0000009f, 0x00a17730}
1504 };
1505
1506 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1507         {0x0000006f, 0x03044000},
1508         {0x00000070, 0x0480c018},
1509         {0x00000071, 0x00000040},
1510         {0x00000072, 0x01000000},
1511         {0x00000074, 0x000000ff},
1512         {0x00000075, 0x00143400},
1513         {0x00000076, 0x08ec0800},
1514         {0x00000077, 0x040000cc},
1515         {0x00000079, 0x00000000},
1516         {0x0000007a, 0x21000409},
1517         {0x0000007c, 0x00000000},
1518         {0x0000007d, 0xe8000000},
1519         {0x0000007e, 0x044408a8},
1520         {0x0000007f, 0x00000003},
1521         {0x00000080, 0x00000000},
1522         {0x00000081, 0x01000000},
1523         {0x00000082, 0x02000000},
1524         {0x00000083, 0x00000000},
1525         {0x00000084, 0xe3f3e4f4},
1526         {0x00000085, 0x00052024},
1527         {0x00000087, 0x00000000},
1528         {0x00000088, 0x66036603},
1529         {0x00000089, 0x01000000},
1530         {0x0000008b, 0x1c0a0000},
1531         {0x0000008c, 0xff010000},
1532         {0x0000008e, 0xffffefff},
1533         {0x0000008f, 0xfff3efff},
1534         {0x00000090, 0xfff3efbf},
1535         {0x00000094, 0x00101101},
1536         {0x00000095, 0x00000fff},
1537         {0x00000096, 0x00116fff},
1538         {0x00000097, 0x60010000},
1539         {0x00000098, 0x10010000},
1540         {0x00000099, 0x00006000},
1541         {0x0000009a, 0x00001000},
1542         {0x0000009f, 0x00a07730}
1543 };
1544
1545 /* ucode loading */
1546 int si_mc_load_microcode(struct radeon_device *rdev)
1547 {
1548         const __be32 *fw_data = NULL;
1549         const __le32 *new_fw_data = NULL;
1550         u32 running;
1551         u32 *io_mc_regs = NULL;
1552         const __le32 *new_io_mc_regs = NULL;
1553         int i, regs_size, ucode_size;
1554
1555         if (!rdev->mc_fw)
1556                 return -EINVAL;
1557
1558         if (rdev->new_fw) {
1559                 const struct mc_firmware_header_v1_0 *hdr =
1560                         (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1561
1562                 radeon_ucode_print_mc_hdr(&hdr->header);
1563                 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1564                 new_io_mc_regs = (const __le32 *)
1565                         (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1566                 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1567                 new_fw_data = (const __le32 *)
1568                         (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1569         } else {
1570                 ucode_size = rdev->mc_fw->size / 4;
1571
1572                 switch (rdev->family) {
1573                 case CHIP_TAHITI:
1574                         io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1575                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1576                         break;
1577                 case CHIP_PITCAIRN:
1578                         io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1579                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1580                         break;
1581                 case CHIP_VERDE:
1582                 default:
1583                         io_mc_regs = (u32 *)&verde_io_mc_regs;
1584                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1585                         break;
1586                 case CHIP_OLAND:
1587                         io_mc_regs = (u32 *)&oland_io_mc_regs;
1588                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1589                         break;
1590                 case CHIP_HAINAN:
1591                         io_mc_regs = (u32 *)&hainan_io_mc_regs;
1592                         regs_size = TAHITI_IO_MC_REGS_SIZE;
1593                         break;
1594                 }
1595                 fw_data = (const __be32 *)rdev->mc_fw->data;
1596         }
1597
1598         running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1599
1600         if (running == 0) {
1601                 /* reset the engine and set to writable */
1602                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1603                 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1604
1605                 /* load mc io regs */
1606                 for (i = 0; i < regs_size; i++) {
1607                         if (rdev->new_fw) {
1608                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1609                                 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1610                         } else {
1611                                 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1612                                 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1613                         }
1614                 }
1615                 /* load the MC ucode */
1616                 for (i = 0; i < ucode_size; i++) {
1617                         if (rdev->new_fw)
1618                                 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1619                         else
1620                                 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1621                 }
1622
1623                 /* put the engine back into the active state */
1624                 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1625                 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1626                 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1627
1628                 /* wait for training to complete */
1629                 for (i = 0; i < rdev->usec_timeout; i++) {
1630                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1631                                 break;
1632                         udelay(1);
1633                 }
1634                 for (i = 0; i < rdev->usec_timeout; i++) {
1635                         if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1636                                 break;
1637                         udelay(1);
1638                 }
1639         }
1640
1641         return 0;
1642 }
1643
1644 static int si_init_microcode(struct radeon_device *rdev)
1645 {
1646         const char *chip_name;
1647         const char *new_chip_name;
1648         size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1649         size_t smc_req_size, mc2_req_size;
1650         char fw_name[30];
1651         int err;
1652         int new_fw = 0;
1653         bool new_smc = false;
1654
1655         DRM_DEBUG("\n");
1656
1657         switch (rdev->family) {
1658         case CHIP_TAHITI:
1659                 chip_name = "TAHITI";
1660                 /* XXX: figure out which Tahitis need the new ucode */
1661                 if (0)
1662                         new_smc = true;
1663                 new_chip_name = "tahiti";
1664                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1665                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1666                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1667                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1668                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1669                 mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1670                 smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1671                 break;
1672         case CHIP_PITCAIRN:
1673                 chip_name = "PITCAIRN";
1674                 if ((rdev->pdev->revision == 0x81) ||
1675                     (rdev->pdev->device == 0x6810) ||
1676                     (rdev->pdev->device == 0x6811) ||
1677                     (rdev->pdev->device == 0x6816) ||
1678                     (rdev->pdev->device == 0x6817) ||
1679                     (rdev->pdev->device == 0x6806))
1680                         new_smc = true;
1681                 new_chip_name = "pitcairn";
1682                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1683                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1684                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1685                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1686                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1687                 mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1688                 smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1689                 break;
1690         case CHIP_VERDE:
1691                 chip_name = "VERDE";
1692                 if ((rdev->pdev->revision == 0x81) ||
1693                     (rdev->pdev->revision == 0x83) ||
1694                     (rdev->pdev->revision == 0x87) ||
1695                     (rdev->pdev->device == 0x6820) ||
1696                     (rdev->pdev->device == 0x6821) ||
1697                     (rdev->pdev->device == 0x6822) ||
1698                     (rdev->pdev->device == 0x6823) ||
1699                     (rdev->pdev->device == 0x682A) ||
1700                     (rdev->pdev->device == 0x682B))
1701                         new_smc = true;
1702                 new_chip_name = "verde";
1703                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1704                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1705                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1706                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1707                 mc_req_size = SI_MC_UCODE_SIZE * 4;
1708                 mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1709                 smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1710                 break;
1711         case CHIP_OLAND:
1712                 chip_name = "OLAND";
1713                 if ((rdev->pdev->revision == 0xC7) ||
1714                     (rdev->pdev->revision == 0x80) ||
1715                     (rdev->pdev->revision == 0x81) ||
1716                     (rdev->pdev->revision == 0x83) ||
1717                     (rdev->pdev->device == 0x6604) ||
1718                     (rdev->pdev->device == 0x6605))
1719                         new_smc = true;
1720                 new_chip_name = "oland";
1721                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1722                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1723                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1724                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1725                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1726                 smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1727                 break;
1728         case CHIP_HAINAN:
1729                 chip_name = "HAINAN";
1730                 if ((rdev->pdev->revision == 0x81) ||
1731                     (rdev->pdev->revision == 0x83) ||
1732                     (rdev->pdev->revision == 0xC3) ||
1733                     (rdev->pdev->device == 0x6664) ||
1734                     (rdev->pdev->device == 0x6665) ||
1735                     (rdev->pdev->device == 0x6667))
1736                         new_smc = true;
1737                 new_chip_name = "hainan";
1738                 pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1739                 me_req_size = SI_PM4_UCODE_SIZE * 4;
1740                 ce_req_size = SI_CE_UCODE_SIZE * 4;
1741                 rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1742                 mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1743                 smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1744                 break;
1745         default: BUG();
1746         }
1747
1748         DRM_INFO("Loading %s Microcode\n", new_chip_name);
1749
1750         snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1751         err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1752         if (err) {
1753                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1754                 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1755                 if (err)
1756                         goto out;
1757                 if (rdev->pfp_fw->size != pfp_req_size) {
1758                         printk(KERN_ERR
1759                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1760                                rdev->pfp_fw->size, fw_name);
1761                         err = -EINVAL;
1762                         goto out;
1763                 }
1764         } else {
1765                 err = radeon_ucode_validate(rdev->pfp_fw);
1766                 if (err) {
1767                         printk(KERN_ERR
1768                                "si_cp: validation failed for firmware \"%s\"\n",
1769                                fw_name);
1770                         goto out;
1771                 } else {
1772                         new_fw++;
1773                 }
1774         }
1775
1776         snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1777         err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1778         if (err) {
1779                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1780                 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1781                 if (err)
1782                         goto out;
1783                 if (rdev->me_fw->size != me_req_size) {
1784                         printk(KERN_ERR
1785                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1786                                rdev->me_fw->size, fw_name);
1787                         err = -EINVAL;
1788                 }
1789         } else {
1790                 err = radeon_ucode_validate(rdev->me_fw);
1791                 if (err) {
1792                         printk(KERN_ERR
1793                                "si_cp: validation failed for firmware \"%s\"\n",
1794                                fw_name);
1795                         goto out;
1796                 } else {
1797                         new_fw++;
1798                 }
1799         }
1800
1801         snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1802         err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1803         if (err) {
1804                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1805                 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1806                 if (err)
1807                         goto out;
1808                 if (rdev->ce_fw->size != ce_req_size) {
1809                         printk(KERN_ERR
1810                                "si_cp: Bogus length %zu in firmware \"%s\"\n",
1811                                rdev->ce_fw->size, fw_name);
1812                         err = -EINVAL;
1813                 }
1814         } else {
1815                 err = radeon_ucode_validate(rdev->ce_fw);
1816                 if (err) {
1817                         printk(KERN_ERR
1818                                "si_cp: validation failed for firmware \"%s\"\n",
1819                                fw_name);
1820                         goto out;
1821                 } else {
1822                         new_fw++;
1823                 }
1824         }
1825
1826         snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1827         err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1828         if (err) {
1829                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1830                 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1831                 if (err)
1832                         goto out;
1833                 if (rdev->rlc_fw->size != rlc_req_size) {
1834                         printk(KERN_ERR
1835                                "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1836                                rdev->rlc_fw->size, fw_name);
1837                         err = -EINVAL;
1838                 }
1839         } else {
1840                 err = radeon_ucode_validate(rdev->rlc_fw);
1841                 if (err) {
1842                         printk(KERN_ERR
1843                                "si_cp: validation failed for firmware \"%s\"\n",
1844                                fw_name);
1845                         goto out;
1846                 } else {
1847                         new_fw++;
1848                 }
1849         }
1850
1851         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1852         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1853         if (err) {
1854                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1855                 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1856                 if (err) {
1857                         snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1858                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1859                         if (err)
1860                                 goto out;
1861                 }
1862                 if ((rdev->mc_fw->size != mc_req_size) &&
1863                     (rdev->mc_fw->size != mc2_req_size)) {
1864                         printk(KERN_ERR
1865                                "si_mc: Bogus length %zu in firmware \"%s\"\n",
1866                                rdev->mc_fw->size, fw_name);
1867                         err = -EINVAL;
1868                 }
1869                 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1870         } else {
1871                 err = radeon_ucode_validate(rdev->mc_fw);
1872                 if (err) {
1873                         printk(KERN_ERR
1874                                "si_cp: validation failed for firmware \"%s\"\n",
1875                                fw_name);
1876                         goto out;
1877                 } else {
1878                         new_fw++;
1879                 }
1880         }
1881
1882         if (new_smc)
1883                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1884         else
1885                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1886         err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1887         if (err) {
1888                 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1889                 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1890                 if (err) {
1891                         printk(KERN_ERR
1892                                "smc: error loading firmware \"%s\"\n",
1893                                fw_name);
1894                         release_firmware(rdev->smc_fw);
1895                         rdev->smc_fw = NULL;
1896                         err = 0;
1897                 } else if (rdev->smc_fw->size != smc_req_size) {
1898                         printk(KERN_ERR
1899                                "si_smc: Bogus length %zu in firmware \"%s\"\n",
1900                                rdev->smc_fw->size, fw_name);
1901                         err = -EINVAL;
1902                 }
1903         } else {
1904                 err = radeon_ucode_validate(rdev->smc_fw);
1905                 if (err) {
1906                         printk(KERN_ERR
1907                                "si_cp: validation failed for firmware \"%s\"\n",
1908                                fw_name);
1909                         goto out;
1910                 } else {
1911                         new_fw++;
1912                 }
1913         }
1914
1915         if (new_fw == 0) {
1916                 rdev->new_fw = false;
1917         } else if (new_fw < 6) {
1918                 printk(KERN_ERR "si_fw: mixing new and old firmware!\n");
1919                 err = -EINVAL;
1920         } else {
1921                 rdev->new_fw = true;
1922         }
1923 out:
1924         if (err) {
1925                 if (err != -EINVAL)
1926                         printk(KERN_ERR
1927                                "si_cp: Failed to load firmware \"%s\"\n",
1928                                fw_name);
1929                 release_firmware(rdev->pfp_fw);
1930                 rdev->pfp_fw = NULL;
1931                 release_firmware(rdev->me_fw);
1932                 rdev->me_fw = NULL;
1933                 release_firmware(rdev->ce_fw);
1934                 rdev->ce_fw = NULL;
1935                 release_firmware(rdev->rlc_fw);
1936                 rdev->rlc_fw = NULL;
1937                 release_firmware(rdev->mc_fw);
1938                 rdev->mc_fw = NULL;
1939                 release_firmware(rdev->smc_fw);
1940                 rdev->smc_fw = NULL;
1941         }
1942         return err;
1943 }
1944
1945 /* watermark setup */
1946 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1947                                    struct radeon_crtc *radeon_crtc,
1948                                    struct drm_display_mode *mode,
1949                                    struct drm_display_mode *other_mode)
1950 {
1951         u32 tmp, buffer_alloc, i;
1952         u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1953         /*
1954          * Line Buffer Setup
1955          * There are 3 line buffers, each one shared by 2 display controllers.
1956          * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1957          * the display controllers.  The paritioning is done via one of four
1958          * preset allocations specified in bits 21:20:
1959          *  0 - half lb
1960          *  2 - whole lb, other crtc must be disabled
1961          */
1962         /* this can get tricky if we have two large displays on a paired group
1963          * of crtcs.  Ideally for multiple large displays we'd assign them to
1964          * non-linked crtcs for maximum line buffer allocation.
1965          */
1966         if (radeon_crtc->base.enabled && mode) {
1967                 if (other_mode) {
1968                         tmp = 0; /* 1/2 */
1969                         buffer_alloc = 1;
1970                 } else {
1971                         tmp = 2; /* whole */
1972                         buffer_alloc = 2;
1973                 }
1974         } else {
1975                 tmp = 0;
1976                 buffer_alloc = 0;
1977         }
1978
1979         WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1980                DC_LB_MEMORY_CONFIG(tmp));
1981
1982         WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1983                DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1984         for (i = 0; i < rdev->usec_timeout; i++) {
1985                 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1986                     DMIF_BUFFERS_ALLOCATED_COMPLETED)
1987                         break;
1988                 udelay(1);
1989         }
1990
1991         if (radeon_crtc->base.enabled && mode) {
1992                 switch (tmp) {
1993                 case 0:
1994                 default:
1995                         return 4096 * 2;
1996                 case 2:
1997                         return 8192 * 2;
1998                 }
1999         }
2000
2001         /* controller not enabled, so no lb used */
2002         return 0;
2003 }
2004
2005 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2006 {
2007         u32 tmp = RREG32(MC_SHARED_CHMAP);
2008
2009         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2010         case 0:
2011         default:
2012                 return 1;
2013         case 1:
2014                 return 2;
2015         case 2:
2016                 return 4;
2017         case 3:
2018                 return 8;
2019         case 4:
2020                 return 3;
2021         case 5:
2022                 return 6;
2023         case 6:
2024                 return 10;
2025         case 7:
2026                 return 12;
2027         case 8:
2028                 return 16;
2029         }
2030 }
2031
2032 struct dce6_wm_params {
2033         u32 dram_channels; /* number of dram channels */
2034         u32 yclk;          /* bandwidth per dram data pin in kHz */
2035         u32 sclk;          /* engine clock in kHz */
2036         u32 disp_clk;      /* display clock in kHz */
2037         u32 src_width;     /* viewport width */
2038         u32 active_time;   /* active display time in ns */
2039         u32 blank_time;    /* blank time in ns */
2040         bool interlaced;    /* mode is interlaced */
2041         fixed20_12 vsc;    /* vertical scale ratio */
2042         u32 num_heads;     /* number of active crtcs */
2043         u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2044         u32 lb_size;       /* line buffer allocated to pipe */
2045         u32 vtaps;         /* vertical scaler taps */
2046 };
2047
2048 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2049 {
2050         /* Calculate raw DRAM Bandwidth */
2051         fixed20_12 dram_efficiency; /* 0.7 */
2052         fixed20_12 yclk, dram_channels, bandwidth;
2053         fixed20_12 a;
2054
2055         a.full = dfixed_const(1000);
2056         yclk.full = dfixed_const(wm->yclk);
2057         yclk.full = dfixed_div(yclk, a);
2058         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2059         a.full = dfixed_const(10);
2060         dram_efficiency.full = dfixed_const(7);
2061         dram_efficiency.full = dfixed_div(dram_efficiency, a);
2062         bandwidth.full = dfixed_mul(dram_channels, yclk);
2063         bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2064
2065         return dfixed_trunc(bandwidth);
2066 }
2067
2068 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2069 {
2070         /* Calculate DRAM Bandwidth and the part allocated to display. */
2071         fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2072         fixed20_12 yclk, dram_channels, bandwidth;
2073         fixed20_12 a;
2074
2075         a.full = dfixed_const(1000);
2076         yclk.full = dfixed_const(wm->yclk);
2077         yclk.full = dfixed_div(yclk, a);
2078         dram_channels.full = dfixed_const(wm->dram_channels * 4);
2079         a.full = dfixed_const(10);
2080         disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2081         disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2082         bandwidth.full = dfixed_mul(dram_channels, yclk);
2083         bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2084
2085         return dfixed_trunc(bandwidth);
2086 }
2087
2088 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2089 {
2090         /* Calculate the display Data return Bandwidth */
2091         fixed20_12 return_efficiency; /* 0.8 */
2092         fixed20_12 sclk, bandwidth;
2093         fixed20_12 a;
2094
2095         a.full = dfixed_const(1000);
2096         sclk.full = dfixed_const(wm->sclk);
2097         sclk.full = dfixed_div(sclk, a);
2098         a.full = dfixed_const(10);
2099         return_efficiency.full = dfixed_const(8);
2100         return_efficiency.full = dfixed_div(return_efficiency, a);
2101         a.full = dfixed_const(32);
2102         bandwidth.full = dfixed_mul(a, sclk);
2103         bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2104
2105         return dfixed_trunc(bandwidth);
2106 }
2107
2108 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2109 {
2110         return 32;
2111 }
2112
2113 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2114 {
2115         /* Calculate the DMIF Request Bandwidth */
2116         fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2117         fixed20_12 disp_clk, sclk, bandwidth;
2118         fixed20_12 a, b1, b2;
2119         u32 min_bandwidth;
2120
2121         a.full = dfixed_const(1000);
2122         disp_clk.full = dfixed_const(wm->disp_clk);
2123         disp_clk.full = dfixed_div(disp_clk, a);
2124         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2125         b1.full = dfixed_mul(a, disp_clk);
2126
2127         a.full = dfixed_const(1000);
2128         sclk.full = dfixed_const(wm->sclk);
2129         sclk.full = dfixed_div(sclk, a);
2130         a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2131         b2.full = dfixed_mul(a, sclk);
2132
2133         a.full = dfixed_const(10);
2134         disp_clk_request_efficiency.full = dfixed_const(8);
2135         disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2136
2137         min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2138
2139         a.full = dfixed_const(min_bandwidth);
2140         bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2141
2142         return dfixed_trunc(bandwidth);
2143 }
2144
2145 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2146 {
2147         /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2148         u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2149         u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2150         u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2151
2152         return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2153 }
2154
2155 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2156 {
2157         /* Calculate the display mode Average Bandwidth
2158          * DisplayMode should contain the source and destination dimensions,
2159          * timing, etc.
2160          */
2161         fixed20_12 bpp;
2162         fixed20_12 line_time;
2163         fixed20_12 src_width;
2164         fixed20_12 bandwidth;
2165         fixed20_12 a;
2166
2167         a.full = dfixed_const(1000);
2168         line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2169         line_time.full = dfixed_div(line_time, a);
2170         bpp.full = dfixed_const(wm->bytes_per_pixel);
2171         src_width.full = dfixed_const(wm->src_width);
2172         bandwidth.full = dfixed_mul(src_width, bpp);
2173         bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2174         bandwidth.full = dfixed_div(bandwidth, line_time);
2175
2176         return dfixed_trunc(bandwidth);
2177 }
2178
2179 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2180 {
2181         /* First calcualte the latency in ns */
2182         u32 mc_latency = 2000; /* 2000 ns. */
2183         u32 available_bandwidth = dce6_available_bandwidth(wm);
2184         u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2185         u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2186         u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2187         u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2188                 (wm->num_heads * cursor_line_pair_return_time);
2189         u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2190         u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2191         u32 tmp, dmif_size = 12288;
2192         fixed20_12 a, b, c;
2193
2194         if (wm->num_heads == 0)
2195                 return 0;
2196
2197         a.full = dfixed_const(2);
2198         b.full = dfixed_const(1);
2199         if ((wm->vsc.full > a.full) ||
2200             ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2201             (wm->vtaps >= 5) ||
2202             ((wm->vsc.full >= a.full) && wm->interlaced))
2203                 max_src_lines_per_dst_line = 4;
2204         else
2205                 max_src_lines_per_dst_line = 2;
2206
2207         a.full = dfixed_const(available_bandwidth);
2208         b.full = dfixed_const(wm->num_heads);
2209         a.full = dfixed_div(a, b);
2210
2211         b.full = dfixed_const(mc_latency + 512);
2212         c.full = dfixed_const(wm->disp_clk);
2213         b.full = dfixed_div(b, c);
2214
2215         c.full = dfixed_const(dmif_size);
2216         b.full = dfixed_div(c, b);
2217
2218         tmp = min(dfixed_trunc(a), dfixed_trunc(b));
2219
2220         b.full = dfixed_const(1000);
2221         c.full = dfixed_const(wm->disp_clk);
2222         b.full = dfixed_div(c, b);
2223         c.full = dfixed_const(wm->bytes_per_pixel);
2224         b.full = dfixed_mul(b, c);
2225
2226         lb_fill_bw = min(tmp, dfixed_trunc(b));
2227
2228         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2229         b.full = dfixed_const(1000);
2230         c.full = dfixed_const(lb_fill_bw);
2231         b.full = dfixed_div(c, b);
2232         a.full = dfixed_div(a, b);
2233         line_fill_time = dfixed_trunc(a);
2234
2235         if (line_fill_time < wm->active_time)
2236                 return latency;
2237         else
2238                 return latency + (line_fill_time - wm->active_time);
2239
2240 }
2241
2242 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2243 {
2244         if (dce6_average_bandwidth(wm) <=
2245             (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2246                 return true;
2247         else
2248                 return false;
2249 };
2250
2251 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2252 {
2253         if (dce6_average_bandwidth(wm) <=
2254             (dce6_available_bandwidth(wm) / wm->num_heads))
2255                 return true;
2256         else
2257                 return false;
2258 };
2259
2260 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2261 {
2262         u32 lb_partitions = wm->lb_size / wm->src_width;
2263         u32 line_time = wm->active_time + wm->blank_time;
2264         u32 latency_tolerant_lines;
2265         u32 latency_hiding;
2266         fixed20_12 a;
2267
2268         a.full = dfixed_const(1);
2269         if (wm->vsc.full > a.full)
2270                 latency_tolerant_lines = 1;
2271         else {
2272                 if (lb_partitions <= (wm->vtaps + 1))
2273                         latency_tolerant_lines = 1;
2274                 else
2275                         latency_tolerant_lines = 2;
2276         }
2277
2278         latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2279
2280         if (dce6_latency_watermark(wm) <= latency_hiding)
2281                 return true;
2282         else
2283                 return false;
2284 }
2285
2286 static void dce6_program_watermarks(struct radeon_device *rdev,
2287                                          struct radeon_crtc *radeon_crtc,
2288                                          u32 lb_size, u32 num_heads)
2289 {
2290         struct drm_display_mode *mode = &radeon_crtc->base.mode;
2291         struct dce6_wm_params wm_low, wm_high;
2292         u32 dram_channels;
2293         u32 pixel_period;
2294         u32 line_time = 0;
2295         u32 latency_watermark_a = 0, latency_watermark_b = 0;
2296         u32 priority_a_mark = 0, priority_b_mark = 0;
2297         u32 priority_a_cnt = PRIORITY_OFF;
2298         u32 priority_b_cnt = PRIORITY_OFF;
2299         u32 tmp, arb_control3;
2300         fixed20_12 a, b, c;
2301
2302         if (radeon_crtc->base.enabled && num_heads && mode) {
2303                 pixel_period = 1000000 / (u32)mode->clock;
2304                 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2305                 priority_a_cnt = 0;
2306                 priority_b_cnt = 0;
2307
2308                 if (rdev->family == CHIP_ARUBA)
2309                         dram_channels = evergreen_get_number_of_dram_channels(rdev);
2310                 else
2311                         dram_channels = si_get_number_of_dram_channels(rdev);
2312
2313                 /* watermark for high clocks */
2314                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2315                         wm_high.yclk =
2316                                 radeon_dpm_get_mclk(rdev, false) * 10;
2317                         wm_high.sclk =
2318                                 radeon_dpm_get_sclk(rdev, false) * 10;
2319                 } else {
2320                         wm_high.yclk = rdev->pm.current_mclk * 10;
2321                         wm_high.sclk = rdev->pm.current_sclk * 10;
2322                 }
2323
2324                 wm_high.disp_clk = mode->clock;
2325                 wm_high.src_width = mode->crtc_hdisplay;
2326                 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2327                 wm_high.blank_time = line_time - wm_high.active_time;
2328                 wm_high.interlaced = false;
2329                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2330                         wm_high.interlaced = true;
2331                 wm_high.vsc = radeon_crtc->vsc;
2332                 wm_high.vtaps = 1;
2333                 if (radeon_crtc->rmx_type != RMX_OFF)
2334                         wm_high.vtaps = 2;
2335                 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2336                 wm_high.lb_size = lb_size;
2337                 wm_high.dram_channels = dram_channels;
2338                 wm_high.num_heads = num_heads;
2339
2340                 /* watermark for low clocks */
2341                 if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2342                         wm_low.yclk =
2343                                 radeon_dpm_get_mclk(rdev, true) * 10;
2344                         wm_low.sclk =
2345                                 radeon_dpm_get_sclk(rdev, true) * 10;
2346                 } else {
2347                         wm_low.yclk = rdev->pm.current_mclk * 10;
2348                         wm_low.sclk = rdev->pm.current_sclk * 10;
2349                 }
2350
2351                 wm_low.disp_clk = mode->clock;
2352                 wm_low.src_width = mode->crtc_hdisplay;
2353                 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2354                 wm_low.blank_time = line_time - wm_low.active_time;
2355                 wm_low.interlaced = false;
2356                 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2357                         wm_low.interlaced = true;
2358                 wm_low.vsc = radeon_crtc->vsc;
2359                 wm_low.vtaps = 1;
2360                 if (radeon_crtc->rmx_type != RMX_OFF)
2361                         wm_low.vtaps = 2;
2362                 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2363                 wm_low.lb_size = lb_size;
2364                 wm_low.dram_channels = dram_channels;
2365                 wm_low.num_heads = num_heads;
2366
2367                 /* set for high clocks */
2368                 latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2369                 /* set for low clocks */
2370                 latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2371
2372                 /* possibly force display priority to high */
2373                 /* should really do this at mode validation time... */
2374                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2375                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2376                     !dce6_check_latency_hiding(&wm_high) ||
2377                     (rdev->disp_priority == 2)) {
2378                         DRM_DEBUG_KMS("force priority to high\n");
2379                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2380                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2381                 }
2382                 if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2383                     !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2384                     !dce6_check_latency_hiding(&wm_low) ||
2385                     (rdev->disp_priority == 2)) {
2386                         DRM_DEBUG_KMS("force priority to high\n");
2387                         priority_a_cnt |= PRIORITY_ALWAYS_ON;
2388                         priority_b_cnt |= PRIORITY_ALWAYS_ON;
2389                 }
2390
2391                 a.full = dfixed_const(1000);
2392                 b.full = dfixed_const(mode->clock);
2393                 b.full = dfixed_div(b, a);
2394                 c.full = dfixed_const(latency_watermark_a);
2395                 c.full = dfixed_mul(c, b);
2396                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2397                 c.full = dfixed_div(c, a);
2398                 a.full = dfixed_const(16);
2399                 c.full = dfixed_div(c, a);
2400                 priority_a_mark = dfixed_trunc(c);
2401                 priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2402
2403                 a.full = dfixed_const(1000);
2404                 b.full = dfixed_const(mode->clock);
2405                 b.full = dfixed_div(b, a);
2406                 c.full = dfixed_const(latency_watermark_b);
2407                 c.full = dfixed_mul(c, b);
2408                 c.full = dfixed_mul(c, radeon_crtc->hsc);
2409                 c.full = dfixed_div(c, a);
2410                 a.full = dfixed_const(16);
2411                 c.full = dfixed_div(c, a);
2412                 priority_b_mark = dfixed_trunc(c);
2413                 priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2414
2415                 /* Save number of lines the linebuffer leads before the scanout */
2416                 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2417         }
2418
2419         /* select wm A */
2420         arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2421         tmp = arb_control3;
2422         tmp &= ~LATENCY_WATERMARK_MASK(3);
2423         tmp |= LATENCY_WATERMARK_MASK(1);
2424         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2425         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2426                (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2427                 LATENCY_HIGH_WATERMARK(line_time)));
2428         /* select wm B */
2429         tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2430         tmp &= ~LATENCY_WATERMARK_MASK(3);
2431         tmp |= LATENCY_WATERMARK_MASK(2);
2432         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2433         WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2434                (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2435                 LATENCY_HIGH_WATERMARK(line_time)));
2436         /* restore original selection */
2437         WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2438
2439         /* write the priority marks */
2440         WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2441         WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2442
2443         /* save values for DPM */
2444         radeon_crtc->line_time = line_time;
2445         radeon_crtc->wm_high = latency_watermark_a;
2446         radeon_crtc->wm_low = latency_watermark_b;
2447 }
2448
2449 void dce6_bandwidth_update(struct radeon_device *rdev)
2450 {
2451         struct drm_display_mode *mode0 = NULL;
2452         struct drm_display_mode *mode1 = NULL;
2453         u32 num_heads = 0, lb_size;
2454         int i;
2455
2456         if (!rdev->mode_info.mode_config_initialized)
2457                 return;
2458
2459         radeon_update_display_priority(rdev);
2460
2461         for (i = 0; i < rdev->num_crtc; i++) {
2462                 if (rdev->mode_info.crtcs[i]->base.enabled)
2463                         num_heads++;
2464         }
2465         for (i = 0; i < rdev->num_crtc; i += 2) {
2466                 mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2467                 mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2468                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2469                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2470                 lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2471                 dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2472         }
2473 }
2474
2475 /*
2476  * Core functions
2477  */
2478 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2479 {
2480         u32 *tile = rdev->config.si.tile_mode_array;
2481         const u32 num_tile_mode_states =
2482                         ARRAY_SIZE(rdev->config.si.tile_mode_array);
2483         u32 reg_offset, split_equal_to_row_size;
2484
2485         switch (rdev->config.si.mem_row_size_in_kb) {
2486         case 1:
2487                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2488                 break;
2489         case 2:
2490         default:
2491                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2492                 break;
2493         case 4:
2494                 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2495                 break;
2496         }
2497
2498         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2499                 tile[reg_offset] = 0;
2500
2501         switch(rdev->family) {
2502         case CHIP_TAHITI:
2503         case CHIP_PITCAIRN:
2504                 /* non-AA compressed depth or any compressed stencil */
2505                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2506                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2507                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2508                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2509                            NUM_BANKS(ADDR_SURF_16_BANK) |
2510                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2512                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2513                 /* 2xAA/4xAA compressed depth only */
2514                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2516                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2517                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2518                            NUM_BANKS(ADDR_SURF_16_BANK) |
2519                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2521                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2522                 /* 8xAA compressed depth only */
2523                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2525                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2526                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2527                            NUM_BANKS(ADDR_SURF_16_BANK) |
2528                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2531                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2532                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2534                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2535                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2536                            NUM_BANKS(ADDR_SURF_16_BANK) |
2537                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2540                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2541                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2542                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2543                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2544                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2545                            NUM_BANKS(ADDR_SURF_16_BANK) |
2546                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2549                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2550                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2552                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2553                            TILE_SPLIT(split_equal_to_row_size) |
2554                            NUM_BANKS(ADDR_SURF_16_BANK) |
2555                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2557                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2558                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2559                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2560                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2561                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2562                            TILE_SPLIT(split_equal_to_row_size) |
2563                            NUM_BANKS(ADDR_SURF_16_BANK) |
2564                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2566                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2567                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2568                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2569                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2570                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571                            TILE_SPLIT(split_equal_to_row_size) |
2572                            NUM_BANKS(ADDR_SURF_16_BANK) |
2573                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2575                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2576                 /* 1D and 1D Array Surfaces */
2577                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2578                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2579                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2580                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2581                            NUM_BANKS(ADDR_SURF_16_BANK) |
2582                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2583                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2584                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2585                 /* Displayable maps. */
2586                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2587                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2588                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2589                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2590                            NUM_BANKS(ADDR_SURF_16_BANK) |
2591                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2593                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2594                 /* Display 8bpp. */
2595                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2598                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2599                            NUM_BANKS(ADDR_SURF_16_BANK) |
2600                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2602                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2603                 /* Display 16bpp. */
2604                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2606                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2607                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2608                            NUM_BANKS(ADDR_SURF_16_BANK) |
2609                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2611                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2612                 /* Display 32bpp. */
2613                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2614                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2615                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2616                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2617                            NUM_BANKS(ADDR_SURF_16_BANK) |
2618                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2620                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2621                 /* Thin. */
2622                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2623                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2624                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2625                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2626                            NUM_BANKS(ADDR_SURF_16_BANK) |
2627                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2629                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2630                 /* Thin 8 bpp. */
2631                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2633                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2634                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2635                            NUM_BANKS(ADDR_SURF_16_BANK) |
2636                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2638                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2639                 /* Thin 16 bpp. */
2640                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2642                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2643                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2644                            NUM_BANKS(ADDR_SURF_16_BANK) |
2645                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2647                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2648                 /* Thin 32 bpp. */
2649                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2650                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2651                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2652                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2653                            NUM_BANKS(ADDR_SURF_16_BANK) |
2654                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2657                 /* Thin 64 bpp. */
2658                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2659                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2660                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2661                            TILE_SPLIT(split_equal_to_row_size) |
2662                            NUM_BANKS(ADDR_SURF_16_BANK) |
2663                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2665                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2666                 /* 8 bpp PRT. */
2667                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2668                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2669                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2670                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2671                            NUM_BANKS(ADDR_SURF_16_BANK) |
2672                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2673                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2674                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2675                 /* 16 bpp PRT */
2676                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2678                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2679                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2680                            NUM_BANKS(ADDR_SURF_16_BANK) |
2681                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2683                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2684                 /* 32 bpp PRT */
2685                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2687                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2688                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2689                            NUM_BANKS(ADDR_SURF_16_BANK) |
2690                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2692                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2693                 /* 64 bpp PRT */
2694                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2696                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2697                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2698                            NUM_BANKS(ADDR_SURF_16_BANK) |
2699                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2700                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2701                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2702                 /* 128 bpp PRT */
2703                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2705                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2706                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2707                            NUM_BANKS(ADDR_SURF_8_BANK) |
2708                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2711
2712                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2713                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2714                 break;
2715
2716         case CHIP_VERDE:
2717         case CHIP_OLAND:
2718         case CHIP_HAINAN:
2719                 /* non-AA compressed depth or any compressed stencil */
2720                 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2721                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2722                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2723                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2724                            NUM_BANKS(ADDR_SURF_16_BANK) |
2725                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2727                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2728                 /* 2xAA/4xAA compressed depth only */
2729                 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2730                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2731                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2732                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2733                            NUM_BANKS(ADDR_SURF_16_BANK) |
2734                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2736                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2737                 /* 8xAA compressed depth only */
2738                 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2739                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2740                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2741                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2742                            NUM_BANKS(ADDR_SURF_16_BANK) |
2743                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2746                 /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2747                 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2748                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2749                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2751                            NUM_BANKS(ADDR_SURF_16_BANK) |
2752                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2754                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2755                 /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2756                 tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2757                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2758                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2760                            NUM_BANKS(ADDR_SURF_16_BANK) |
2761                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2763                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2764                 /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2765                 tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2767                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768                            TILE_SPLIT(split_equal_to_row_size) |
2769                            NUM_BANKS(ADDR_SURF_16_BANK) |
2770                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2772                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2773                 /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2774                 tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2776                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2777                            TILE_SPLIT(split_equal_to_row_size) |
2778                            NUM_BANKS(ADDR_SURF_16_BANK) |
2779                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2781                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2782                 /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2783                 tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2784                            MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2785                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786                            TILE_SPLIT(split_equal_to_row_size) |
2787                            NUM_BANKS(ADDR_SURF_16_BANK) |
2788                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2789                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2790                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2791                 /* 1D and 1D Array Surfaces */
2792                 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2793                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2796                            NUM_BANKS(ADDR_SURF_16_BANK) |
2797                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2799                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2800                 /* Displayable maps. */
2801                 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2802                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2803                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2804                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2805                            NUM_BANKS(ADDR_SURF_16_BANK) |
2806                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2808                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2809                 /* Display 8bpp. */
2810                 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2811                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2812                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2813                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2814                            NUM_BANKS(ADDR_SURF_16_BANK) |
2815                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2817                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2818                 /* Display 16bpp. */
2819                 tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2822                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2823                            NUM_BANKS(ADDR_SURF_16_BANK) |
2824                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2826                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2827                 /* Display 32bpp. */
2828                 tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2829                            MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2830                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2831                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2832                            NUM_BANKS(ADDR_SURF_16_BANK) |
2833                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2836                 /* Thin. */
2837                 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2838                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2839                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2840                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2841                            NUM_BANKS(ADDR_SURF_16_BANK) |
2842                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2844                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2845                 /* Thin 8 bpp. */
2846                 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2847                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2848                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2849                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2850                            NUM_BANKS(ADDR_SURF_16_BANK) |
2851                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2853                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2854                 /* Thin 16 bpp. */
2855                 tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2857                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2858                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2859                            NUM_BANKS(ADDR_SURF_16_BANK) |
2860                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2862                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2863                 /* Thin 32 bpp. */
2864                 tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2865                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2866                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2867                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2868                            NUM_BANKS(ADDR_SURF_16_BANK) |
2869                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2872                 /* Thin 64 bpp. */
2873                 tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2874                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2875                            PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2876                            TILE_SPLIT(split_equal_to_row_size) |
2877                            NUM_BANKS(ADDR_SURF_16_BANK) |
2878                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2879                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2880                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2881                 /* 8 bpp PRT. */
2882                 tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2883                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2884                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2885                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2886                            NUM_BANKS(ADDR_SURF_16_BANK) |
2887                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2888                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2889                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2890                 /* 16 bpp PRT */
2891                 tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2893                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2894                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2895                            NUM_BANKS(ADDR_SURF_16_BANK) |
2896                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2897                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2898                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2899                 /* 32 bpp PRT */
2900                 tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2902                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2903                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2904                            NUM_BANKS(ADDR_SURF_16_BANK) |
2905                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2907                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2908                 /* 64 bpp PRT */
2909                 tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2911                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2912                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2913                            NUM_BANKS(ADDR_SURF_16_BANK) |
2914                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2916                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2917                 /* 128 bpp PRT */
2918                 tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2919                            MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2920                            PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2921                            TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2922                            NUM_BANKS(ADDR_SURF_8_BANK) |
2923                            BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2924                            BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2925                            MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2926
2927                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2928                         WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2929                 break;
2930
2931         default:
2932                 DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2933         }
2934 }
2935
2936 static void si_select_se_sh(struct radeon_device *rdev,
2937                             u32 se_num, u32 sh_num)
2938 {
2939         u32 data = INSTANCE_BROADCAST_WRITES;
2940
2941         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2942                 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2943         else if (se_num == 0xffffffff)
2944                 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2945         else if (sh_num == 0xffffffff)
2946                 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2947         else
2948                 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2949         WREG32(GRBM_GFX_INDEX, data);
2950 }
2951
2952 static u32 si_create_bitmask(u32 bit_width)
2953 {
2954         u32 i, mask = 0;
2955
2956         for (i = 0; i < bit_width; i++) {
2957                 mask <<= 1;
2958                 mask |= 1;
2959         }
2960         return mask;
2961 }
2962
2963 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2964 {
2965         u32 data, mask;
2966
2967         data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2968         if (data & 1)
2969                 data &= INACTIVE_CUS_MASK;
2970         else
2971                 data = 0;
2972         data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2973
2974         data >>= INACTIVE_CUS_SHIFT;
2975
2976         mask = si_create_bitmask(cu_per_sh);
2977
2978         return ~data & mask;
2979 }
2980
2981 static void si_setup_spi(struct radeon_device *rdev,
2982                          u32 se_num, u32 sh_per_se,
2983                          u32 cu_per_sh)
2984 {
2985         int i, j, k;
2986         u32 data, mask, active_cu;
2987
2988         for (i = 0; i < se_num; i++) {
2989                 for (j = 0; j < sh_per_se; j++) {
2990                         si_select_se_sh(rdev, i, j);
2991                         data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2992                         active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2993
2994                         mask = 1;
2995                         for (k = 0; k < 16; k++) {
2996                                 mask <<= k;
2997                                 if (active_cu & mask) {
2998                                         data &= ~mask;
2999                                         WREG32(SPI_STATIC_THREAD_MGMT_3, data);
3000                                         break;
3001                                 }
3002                         }
3003                 }
3004         }
3005         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3006 }
3007
3008 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3009                               u32 max_rb_num_per_se,
3010                               u32 sh_per_se)
3011 {
3012         u32 data, mask;
3013
3014         data = RREG32(CC_RB_BACKEND_DISABLE);
3015         if (data & 1)
3016                 data &= BACKEND_DISABLE_MASK;
3017         else
3018                 data = 0;
3019         data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3020
3021         data >>= BACKEND_DISABLE_SHIFT;
3022
3023         mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3024
3025         return data & mask;
3026 }
3027
3028 static void si_setup_rb(struct radeon_device *rdev,
3029                         u32 se_num, u32 sh_per_se,
3030                         u32 max_rb_num_per_se)
3031 {
3032         int i, j;
3033         u32 data, mask;
3034         u32 disabled_rbs = 0;
3035         u32 enabled_rbs = 0;
3036
3037         for (i = 0; i < se_num; i++) {
3038                 for (j = 0; j < sh_per_se; j++) {
3039                         si_select_se_sh(rdev, i, j);
3040                         data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3041                         disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3042                 }
3043         }
3044         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3045
3046         mask = 1;
3047         for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3048                 if (!(disabled_rbs & mask))
3049                         enabled_rbs |= mask;
3050                 mask <<= 1;
3051         }
3052
3053         rdev->config.si.backend_enable_mask = enabled_rbs;
3054
3055         for (i = 0; i < se_num; i++) {
3056                 si_select_se_sh(rdev, i, 0xffffffff);
3057                 data = 0;
3058                 for (j = 0; j < sh_per_se; j++) {
3059                         switch (enabled_rbs & 3) {
3060                         case 1:
3061                                 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3062                                 break;
3063                         case 2:
3064                                 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3065                                 break;
3066                         case 3:
3067                         default:
3068                                 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3069                                 break;
3070                         }
3071                         enabled_rbs >>= 2;
3072                 }
3073                 WREG32(PA_SC_RASTER_CONFIG, data);
3074         }
3075         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3076 }
3077
3078 static void si_gpu_init(struct radeon_device *rdev)
3079 {
3080         u32 gb_addr_config = 0;
3081         u32 mc_shared_chmap, mc_arb_ramcfg;
3082         u32 sx_debug_1;
3083         u32 hdp_host_path_cntl;
3084         u32 tmp;
3085         int i, j;
3086
3087         switch (rdev->family) {
3088         case CHIP_TAHITI:
3089                 rdev->config.si.max_shader_engines = 2;
3090                 rdev->config.si.max_tile_pipes = 12;
3091                 rdev->config.si.max_cu_per_sh = 8;
3092                 rdev->config.si.max_sh_per_se = 2;
3093                 rdev->config.si.max_backends_per_se = 4;
3094                 rdev->config.si.max_texture_channel_caches = 12;
3095                 rdev->config.si.max_gprs = 256;
3096                 rdev->config.si.max_gs_threads = 32;
3097                 rdev->config.si.max_hw_contexts = 8;
3098
3099                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3100                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3101                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3102                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3103                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3104                 break;
3105         case CHIP_PITCAIRN:
3106                 rdev->config.si.max_shader_engines = 2;
3107                 rdev->config.si.max_tile_pipes = 8;
3108                 rdev->config.si.max_cu_per_sh = 5;
3109                 rdev->config.si.max_sh_per_se = 2;
3110                 rdev->config.si.max_backends_per_se = 4;
3111                 rdev->config.si.max_texture_channel_caches = 8;
3112                 rdev->config.si.max_gprs = 256;
3113                 rdev->config.si.max_gs_threads = 32;
3114                 rdev->config.si.max_hw_contexts = 8;
3115
3116                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3117                 rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3118                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3119                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3120                 gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3121                 break;
3122         case CHIP_VERDE:
3123         default:
3124                 rdev->config.si.max_shader_engines = 1;
3125                 rdev->config.si.max_tile_pipes = 4;
3126                 rdev->config.si.max_cu_per_sh = 5;
3127                 rdev->config.si.max_sh_per_se = 2;
3128                 rdev->config.si.max_backends_per_se = 4;
3129                 rdev->config.si.max_texture_channel_caches = 4;
3130                 rdev->config.si.max_gprs = 256;
3131                 rdev->config.si.max_gs_threads = 32;
3132                 rdev->config.si.max_hw_contexts = 8;
3133
3134                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3135                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3136                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3137                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3138                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3139                 break;
3140         case CHIP_OLAND:
3141                 rdev->config.si.max_shader_engines = 1;
3142                 rdev->config.si.max_tile_pipes = 4;
3143                 rdev->config.si.max_cu_per_sh = 6;
3144                 rdev->config.si.max_sh_per_se = 1;
3145                 rdev->config.si.max_backends_per_se = 2;
3146                 rdev->config.si.max_texture_channel_caches = 4;
3147                 rdev->config.si.max_gprs = 256;
3148                 rdev->config.si.max_gs_threads = 16;
3149                 rdev->config.si.max_hw_contexts = 8;
3150
3151                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3152                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3153                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3154                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3155                 gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3156                 break;
3157         case CHIP_HAINAN:
3158                 rdev->config.si.max_shader_engines = 1;
3159                 rdev->config.si.max_tile_pipes = 4;
3160                 rdev->config.si.max_cu_per_sh = 5;
3161                 rdev->config.si.max_sh_per_se = 1;
3162                 rdev->config.si.max_backends_per_se = 1;
3163                 rdev->config.si.max_texture_channel_caches = 2;
3164                 rdev->config.si.max_gprs = 256;
3165                 rdev->config.si.max_gs_threads = 16;
3166                 rdev->config.si.max_hw_contexts = 8;
3167
3168                 rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3169                 rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3170                 rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3171                 rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3172                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3173                 break;
3174         }
3175
3176         /* Initialize HDP */
3177         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3178                 WREG32((0x2c14 + j), 0x00000000);
3179                 WREG32((0x2c18 + j), 0x00000000);
3180                 WREG32((0x2c1c + j), 0x00000000);
3181                 WREG32((0x2c20 + j), 0x00000000);
3182                 WREG32((0x2c24 + j), 0x00000000);
3183         }
3184
3185         WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3186         WREG32(SRBM_INT_CNTL, 1);
3187         WREG32(SRBM_INT_ACK, 1);
3188
3189         evergreen_fix_pci_max_read_req_size(rdev);
3190
3191         WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3192
3193         mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3194         mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3195
3196         rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3197         rdev->config.si.mem_max_burst_length_bytes = 256;
3198         tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3199         rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3200         if (rdev->config.si.mem_row_size_in_kb > 4)
3201                 rdev->config.si.mem_row_size_in_kb = 4;
3202         /* XXX use MC settings? */
3203         rdev->config.si.shader_engine_tile_size = 32;
3204         rdev->config.si.num_gpus = 1;
3205         rdev->config.si.multi_gpu_tile_size = 64;
3206
3207         /* fix up row size */
3208         gb_addr_config &= ~ROW_SIZE_MASK;
3209         switch (rdev->config.si.mem_row_size_in_kb) {
3210         case 1:
3211         default:
3212                 gb_addr_config |= ROW_SIZE(0);
3213                 break;
3214         case 2:
3215                 gb_addr_config |= ROW_SIZE(1);
3216                 break;
3217         case 4:
3218                 gb_addr_config |= ROW_SIZE(2);
3219                 break;
3220         }
3221
3222         /* setup tiling info dword.  gb_addr_config is not adequate since it does
3223          * not have bank info, so create a custom tiling dword.
3224          * bits 3:0   num_pipes
3225          * bits 7:4   num_banks
3226          * bits 11:8  group_size
3227          * bits 15:12 row_size
3228          */
3229         rdev->config.si.tile_config = 0;
3230         switch (rdev->config.si.num_tile_pipes) {
3231         case 1:
3232                 rdev->config.si.tile_config |= (0 << 0);
3233                 break;
3234         case 2:
3235                 rdev->config.si.tile_config |= (1 << 0);
3236                 break;
3237         case 4:
3238                 rdev->config.si.tile_config |= (2 << 0);
3239                 break;
3240         case 8:
3241         default:
3242                 /* XXX what about 12? */
3243                 rdev->config.si.tile_config |= (3 << 0);
3244                 break;
3245         }       
3246         switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3247         case 0: /* four banks */
3248                 rdev->config.si.tile_config |= 0 << 4;
3249                 break;
3250         case 1: /* eight banks */
3251                 rdev->config.si.tile_config |= 1 << 4;
3252                 break;
3253         case 2: /* sixteen banks */
3254         default:
3255                 rdev->config.si.tile_config |= 2 << 4;
3256                 break;
3257         }
3258         rdev->config.si.tile_config |=
3259                 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3260         rdev->config.si.tile_config |=
3261                 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3262
3263         WREG32(GB_ADDR_CONFIG, gb_addr_config);
3264         WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3265         WREG32(DMIF_ADDR_CALC, gb_addr_config);
3266         WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3267         WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3268         WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3269         if (rdev->has_uvd) {
3270                 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3271                 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3272                 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3273         }
3274
3275         si_tiling_mode_table_init(rdev);
3276
3277         si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3278                     rdev->config.si.max_sh_per_se,
3279                     rdev->config.si.max_backends_per_se);
3280
3281         si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3282                      rdev->config.si.max_sh_per_se,
3283                      rdev->config.si.max_cu_per_sh);
3284
3285         rdev->config.si.active_cus = 0;
3286         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3287                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3288                         rdev->config.si.active_cus +=
3289                                 hweight32(si_get_cu_active_bitmap(rdev, i, j));
3290                 }
3291         }
3292
3293         /* set HW defaults for 3D engine */
3294         WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3295                                      ROQ_IB2_START(0x2b)));
3296         WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3297
3298         sx_debug_1 = RREG32(SX_DEBUG_1);
3299         WREG32(SX_DEBUG_1, sx_debug_1);
3300
3301         WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3302
3303         WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3304                                  SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3305                                  SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3306                                  SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3307
3308         WREG32(VGT_NUM_INSTANCES, 1);
3309
3310         WREG32(CP_PERFMON_CNTL, 0);
3311
3312         WREG32(SQ_CONFIG, 0);
3313
3314         WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3315                                           FORCE_EOV_MAX_REZ_CNT(255)));
3316
3317         WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3318                AUTO_INVLD_EN(ES_AND_GS_AUTO));
3319
3320         WREG32(VGT_GS_VERTEX_REUSE, 16);
3321         WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3322
3323         WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3324         WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3325         WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3326         WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3327         WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3328         WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3329         WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3330         WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3331
3332         tmp = RREG32(HDP_MISC_CNTL);
3333         tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3334         WREG32(HDP_MISC_CNTL, tmp);
3335
3336         hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3337         WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3338
3339         WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3340
3341         udelay(50);
3342 }
3343
3344 /*
3345  * GPU scratch registers helpers function.
3346  */
3347 static void si_scratch_init(struct radeon_device *rdev)
3348 {
3349         int i;
3350
3351         rdev->scratch.num_reg = 7;
3352         rdev->scratch.reg_base = SCRATCH_REG0;
3353         for (i = 0; i < rdev->scratch.num_reg; i++) {
3354                 rdev->scratch.free[i] = true;
3355                 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3356         }
3357 }
3358
3359 void si_fence_ring_emit(struct radeon_device *rdev,
3360                         struct radeon_fence *fence)
3361 {
3362         struct radeon_ring *ring = &rdev->ring[fence->ring];
3363         u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3364
3365         /* flush read cache over gart */
3366         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3367         radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3368         radeon_ring_write(ring, 0);
3369         radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3370         radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3371                           PACKET3_TC_ACTION_ENA |
3372                           PACKET3_SH_KCACHE_ACTION_ENA |
3373                           PACKET3_SH_ICACHE_ACTION_ENA);
3374         radeon_ring_write(ring, 0xFFFFFFFF);
3375         radeon_ring_write(ring, 0);
3376         radeon_ring_write(ring, 10); /* poll interval */
3377         /* EVENT_WRITE_EOP - flush caches, send int */
3378         radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3379         radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3380         radeon_ring_write(ring, lower_32_bits(addr));
3381         radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3382         radeon_ring_write(ring, fence->seq);
3383         radeon_ring_write(ring, 0);
3384 }
3385
3386 /*
3387  * IB stuff
3388  */
3389 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3390 {
3391         struct radeon_ring *ring = &rdev->ring[ib->ring];
3392         unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3393         u32 header;
3394
3395         if (ib->is_const_ib) {
3396                 /* set switch buffer packet before const IB */
3397                 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3398                 radeon_ring_write(ring, 0);
3399
3400                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3401         } else {
3402                 u32 next_rptr;
3403                 if (ring->rptr_save_reg) {
3404                         next_rptr = ring->wptr + 3 + 4 + 8;
3405                         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3406                         radeon_ring_write(ring, ((ring->rptr_save_reg -
3407                                                   PACKET3_SET_CONFIG_REG_START) >> 2));
3408                         radeon_ring_write(ring, next_rptr);
3409                 } else if (rdev->wb.enabled) {
3410                         next_rptr = ring->wptr + 5 + 4 + 8;
3411                         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3412                         radeon_ring_write(ring, (1 << 8));
3413                         radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3414                         radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3415                         radeon_ring_write(ring, next_rptr);
3416                 }
3417
3418                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3419         }
3420
3421         radeon_ring_write(ring, header);
3422         radeon_ring_write(ring,
3423 #ifdef __BIG_ENDIAN
3424                           (2 << 0) |
3425 #endif
3426                           (ib->gpu_addr & 0xFFFFFFFC));
3427         radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3428         radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3429
3430         if (!ib->is_const_ib) {
3431                 /* flush read cache over gart for this vmid */
3432                 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3433                 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3434                 radeon_ring_write(ring, vm_id);
3435                 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3436                 radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3437                                   PACKET3_TC_ACTION_ENA |
3438                                   PACKET3_SH_KCACHE_ACTION_ENA |
3439                                   PACKET3_SH_ICACHE_ACTION_ENA);
3440                 radeon_ring_write(ring, 0xFFFFFFFF);
3441                 radeon_ring_write(ring, 0);
3442                 radeon_ring_write(ring, 10); /* poll interval */
3443         }
3444 }
3445
3446 /*
3447  * CP.
3448  */
3449 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3450 {
3451         if (enable)
3452                 WREG32(CP_ME_CNTL, 0);
3453         else {
3454                 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3455                         radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3456                 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3457                 WREG32(SCRATCH_UMSK, 0);
3458                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3459                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3460                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3461         }
3462         udelay(50);
3463 }
3464
3465 static int si_cp_load_microcode(struct radeon_device *rdev)
3466 {
3467         int i;
3468
3469         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3470                 return -EINVAL;
3471
3472         si_cp_enable(rdev, false);
3473
3474         if (rdev->new_fw) {
3475                 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3476                         (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3477                 const struct gfx_firmware_header_v1_0 *ce_hdr =
3478                         (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3479                 const struct gfx_firmware_header_v1_0 *me_hdr =
3480                         (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3481                 const __le32 *fw_data;
3482                 u32 fw_size;
3483
3484                 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3485                 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3486                 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3487
3488                 /* PFP */
3489                 fw_data = (const __le32 *)
3490                         (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3491                 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3492                 WREG32(CP_PFP_UCODE_ADDR, 0);
3493                 for (i = 0; i < fw_size; i++)
3494                         WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3495                 WREG32(CP_PFP_UCODE_ADDR, 0);
3496
3497                 /* CE */
3498                 fw_data = (const __le32 *)
3499                         (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3500                 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3501                 WREG32(CP_CE_UCODE_ADDR, 0);
3502                 for (i = 0; i < fw_size; i++)
3503                         WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3504                 WREG32(CP_CE_UCODE_ADDR, 0);
3505
3506                 /* ME */
3507                 fw_data = (const __be32 *)
3508                         (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3509                 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3510                 WREG32(CP_ME_RAM_WADDR, 0);
3511                 for (i = 0; i < fw_size; i++)
3512                         WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3513                 WREG32(CP_ME_RAM_WADDR, 0);
3514         } else {
3515                 const __be32 *fw_data;
3516
3517                 /* PFP */
3518                 fw_data = (const __be32 *)rdev->pfp_fw->data;
3519                 WREG32(CP_PFP_UCODE_ADDR, 0);
3520                 for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3521                         WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3522                 WREG32(CP_PFP_UCODE_ADDR, 0);
3523
3524                 /* CE */
3525                 fw_data = (const __be32 *)rdev->ce_fw->data;
3526                 WREG32(CP_CE_UCODE_ADDR, 0);
3527                 for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3528                         WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3529                 WREG32(CP_CE_UCODE_ADDR, 0);
3530
3531                 /* ME */
3532                 fw_data = (const __be32 *)rdev->me_fw->data;
3533                 WREG32(CP_ME_RAM_WADDR, 0);
3534                 for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3535                         WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3536                 WREG32(CP_ME_RAM_WADDR, 0);
3537         }
3538
3539         WREG32(CP_PFP_UCODE_ADDR, 0);
3540         WREG32(CP_CE_UCODE_ADDR, 0);
3541         WREG32(CP_ME_RAM_WADDR, 0);
3542         WREG32(CP_ME_RAM_RADDR, 0);
3543         return 0;
3544 }
3545
3546 static int si_cp_start(struct radeon_device *rdev)
3547 {
3548         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3549         int r, i;
3550
3551         r = radeon_ring_lock(rdev, ring, 7 + 4);
3552         if (r) {
3553                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3554                 return r;
3555         }
3556         /* init the CP */
3557         radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3558         radeon_ring_write(ring, 0x1);
3559         radeon_ring_write(ring, 0x0);
3560         radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3561         radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3562         radeon_ring_write(ring, 0);
3563         radeon_ring_write(ring, 0);
3564
3565         /* init the CE partitions */
3566         radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3567         radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3568         radeon_ring_write(ring, 0xc000);
3569         radeon_ring_write(ring, 0xe000);
3570         radeon_ring_unlock_commit(rdev, ring, false);
3571
3572         si_cp_enable(rdev, true);
3573
3574         r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3575         if (r) {
3576                 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3577                 return r;
3578         }
3579
3580         /* setup clear context state */
3581         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3582         radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3583
3584         for (i = 0; i < si_default_size; i++)
3585                 radeon_ring_write(ring, si_default_state[i]);
3586
3587         radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3588         radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3589
3590         /* set clear context state */
3591         radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3592         radeon_ring_write(ring, 0);
3593
3594         radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3595         radeon_ring_write(ring, 0x00000316);
3596         radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3597         radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3598
3599         radeon_ring_unlock_commit(rdev, ring, false);
3600
3601         for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3602                 ring = &rdev->ring[i];
3603                 r = radeon_ring_lock(rdev, ring, 2);
3604
3605                 /* clear the compute context state */
3606                 radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3607                 radeon_ring_write(ring, 0);
3608
3609                 radeon_ring_unlock_commit(rdev, ring, false);
3610         }
3611
3612         return 0;
3613 }
3614
3615 static void si_cp_fini(struct radeon_device *rdev)
3616 {
3617         struct radeon_ring *ring;
3618         si_cp_enable(rdev, false);
3619
3620         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3621         radeon_ring_fini(rdev, ring);
3622         radeon_scratch_free(rdev, ring->rptr_save_reg);
3623
3624         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3625         radeon_ring_fini(rdev, ring);
3626         radeon_scratch_free(rdev, ring->rptr_save_reg);
3627
3628         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3629         radeon_ring_fini(rdev, ring);
3630         radeon_scratch_free(rdev, ring->rptr_save_reg);
3631 }
3632
3633 static int si_cp_resume(struct radeon_device *rdev)
3634 {
3635         struct radeon_ring *ring;
3636         u32 tmp;
3637         u32 rb_bufsz;
3638         int r;
3639
3640         si_enable_gui_idle_interrupt(rdev, false);
3641
3642         WREG32(CP_SEM_WAIT_TIMER, 0x0);
3643         WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3644
3645         /* Set the write pointer delay */
3646         WREG32(CP_RB_WPTR_DELAY, 0);
3647
3648         WREG32(CP_DEBUG, 0);
3649         WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3650
3651         /* ring 0 - compute and gfx */
3652         /* Set ring buffer size */
3653         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3654         rb_bufsz = order_base_2(ring->ring_size / 8);
3655         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3656 #ifdef __BIG_ENDIAN
3657         tmp |= BUF_SWAP_32BIT;
3658 #endif
3659         WREG32(CP_RB0_CNTL, tmp);
3660
3661         /* Initialize the ring buffer's read and write pointers */
3662         WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3663         ring->wptr = 0;
3664         WREG32(CP_RB0_WPTR, ring->wptr);
3665
3666         /* set the wb address whether it's enabled or not */
3667         WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3668         WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3669
3670         if (rdev->wb.enabled)
3671                 WREG32(SCRATCH_UMSK, 0xff);
3672         else {
3673                 tmp |= RB_NO_UPDATE;
3674                 WREG32(SCRATCH_UMSK, 0);
3675         }
3676
3677         mdelay(1);
3678         WREG32(CP_RB0_CNTL, tmp);
3679
3680         WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3681
3682         /* ring1  - compute only */
3683         /* Set ring buffer size */
3684         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3685         rb_bufsz = order_base_2(ring->ring_size / 8);
3686         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3687 #ifdef __BIG_ENDIAN
3688         tmp |= BUF_SWAP_32BIT;
3689 #endif
3690         WREG32(CP_RB1_CNTL, tmp);
3691
3692         /* Initialize the ring buffer's read and write pointers */
3693         WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3694         ring->wptr = 0;
3695         WREG32(CP_RB1_WPTR, ring->wptr);
3696
3697         /* set the wb address whether it's enabled or not */
3698         WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3699         WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3700
3701         mdelay(1);
3702         WREG32(CP_RB1_CNTL, tmp);
3703
3704         WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3705
3706         /* ring2 - compute only */
3707         /* Set ring buffer size */
3708         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3709         rb_bufsz = order_base_2(ring->ring_size / 8);
3710         tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3711 #ifdef __BIG_ENDIAN
3712         tmp |= BUF_SWAP_32BIT;
3713 #endif
3714         WREG32(CP_RB2_CNTL, tmp);
3715
3716         /* Initialize the ring buffer's read and write pointers */
3717         WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3718         ring->wptr = 0;
3719         WREG32(CP_RB2_WPTR, ring->wptr);
3720
3721         /* set the wb address whether it's enabled or not */
3722         WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3723         WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3724
3725         mdelay(1);
3726         WREG32(CP_RB2_CNTL, tmp);
3727
3728         WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3729
3730         /* start the rings */
3731         si_cp_start(rdev);
3732         rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3733         rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3734         rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3735         r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3736         if (r) {
3737                 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3738                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3739                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3740                 return r;
3741         }
3742         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3743         if (r) {
3744                 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3745         }
3746         r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3747         if (r) {
3748                 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3749         }
3750
3751         si_enable_gui_idle_interrupt(rdev, true);
3752
3753         if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3754                 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3755
3756         return 0;
3757 }
3758
3759 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3760 {
3761         u32 reset_mask = 0;
3762         u32 tmp;
3763
3764         /* GRBM_STATUS */
3765         tmp = RREG32(GRBM_STATUS);
3766         if (tmp & (PA_BUSY | SC_BUSY |
3767                    BCI_BUSY | SX_BUSY |
3768                    TA_BUSY | VGT_BUSY |
3769                    DB_BUSY | CB_BUSY |
3770                    GDS_BUSY | SPI_BUSY |
3771                    IA_BUSY | IA_BUSY_NO_DMA))
3772                 reset_mask |= RADEON_RESET_GFX;
3773
3774         if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3775                    CP_BUSY | CP_COHERENCY_BUSY))
3776                 reset_mask |= RADEON_RESET_CP;
3777
3778         if (tmp & GRBM_EE_BUSY)
3779                 reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3780
3781         /* GRBM_STATUS2 */
3782         tmp = RREG32(GRBM_STATUS2);
3783         if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3784                 reset_mask |= RADEON_RESET_RLC;
3785
3786         /* DMA_STATUS_REG 0 */
3787         tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3788         if (!(tmp & DMA_IDLE))
3789                 reset_mask |= RADEON_RESET_DMA;
3790
3791         /* DMA_STATUS_REG 1 */
3792         tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3793         if (!(tmp & DMA_IDLE))
3794                 reset_mask |= RADEON_RESET_DMA1;
3795
3796         /* SRBM_STATUS2 */
3797         tmp = RREG32(SRBM_STATUS2);
3798         if (tmp & DMA_BUSY)
3799                 reset_mask |= RADEON_RESET_DMA;
3800
3801         if (tmp & DMA1_BUSY)
3802                 reset_mask |= RADEON_RESET_DMA1;
3803
3804         /* SRBM_STATUS */
3805         tmp = RREG32(SRBM_STATUS);
3806
3807         if (tmp & IH_BUSY)
3808                 reset_mask |= RADEON_RESET_IH;
3809
3810         if (tmp & SEM_BUSY)
3811                 reset_mask |= RADEON_RESET_SEM;
3812
3813         if (tmp & GRBM_RQ_PENDING)
3814                 reset_mask |= RADEON_RESET_GRBM;
3815
3816         if (tmp & VMC_BUSY)
3817                 reset_mask |= RADEON_RESET_VMC;
3818
3819         if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3820                    MCC_BUSY | MCD_BUSY))
3821                 reset_mask |= RADEON_RESET_MC;
3822
3823         if (evergreen_is_display_hung(rdev))
3824                 reset_mask |= RADEON_RESET_DISPLAY;
3825
3826         /* VM_L2_STATUS */
3827         tmp = RREG32(VM_L2_STATUS);
3828         if (tmp & L2_BUSY)
3829                 reset_mask |= RADEON_RESET_VMC;
3830
3831         /* Skip MC reset as it's mostly likely not hung, just busy */
3832         if (reset_mask & RADEON_RESET_MC) {
3833                 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3834                 reset_mask &= ~RADEON_RESET_MC;
3835         }
3836
3837         return reset_mask;
3838 }
3839
3840 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3841 {
3842         struct evergreen_mc_save save;
3843         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3844         u32 tmp;
3845
3846         if (reset_mask == 0)
3847                 return;
3848
3849         dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3850
3851         evergreen_print_gpu_status_regs(rdev);
3852         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3853                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3854         dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3855                  RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3856
3857         /* disable PG/CG */
3858         si_fini_pg(rdev);
3859         si_fini_cg(rdev);
3860
3861         /* stop the rlc */
3862         si_rlc_stop(rdev);
3863
3864         /* Disable CP parsing/prefetching */
3865         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3866
3867         if (reset_mask & RADEON_RESET_DMA) {
3868                 /* dma0 */
3869                 tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3870                 tmp &= ~DMA_RB_ENABLE;
3871                 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3872         }
3873         if (reset_mask & RADEON_RESET_DMA1) {
3874                 /* dma1 */
3875                 tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3876                 tmp &= ~DMA_RB_ENABLE;
3877                 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3878         }
3879
3880         udelay(50);
3881
3882         evergreen_mc_stop(rdev, &save);
3883         if (evergreen_mc_wait_for_idle(rdev)) {
3884                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3885         }
3886
3887         if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3888                 grbm_soft_reset = SOFT_RESET_CB |
3889                         SOFT_RESET_DB |
3890                         SOFT_RESET_GDS |
3891                         SOFT_RESET_PA |
3892                         SOFT_RESET_SC |
3893                         SOFT_RESET_BCI |
3894                         SOFT_RESET_SPI |
3895                         SOFT_RESET_SX |
3896                         SOFT_RESET_TC |
3897                         SOFT_RESET_TA |
3898                         SOFT_RESET_VGT |
3899                         SOFT_RESET_IA;
3900         }
3901
3902         if (reset_mask & RADEON_RESET_CP) {
3903                 grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3904
3905                 srbm_soft_reset |= SOFT_RESET_GRBM;
3906         }
3907
3908         if (reset_mask & RADEON_RESET_DMA)
3909                 srbm_soft_reset |= SOFT_RESET_DMA;
3910
3911         if (reset_mask & RADEON_RESET_DMA1)
3912                 srbm_soft_reset |= SOFT_RESET_DMA1;
3913
3914         if (reset_mask & RADEON_RESET_DISPLAY)
3915                 srbm_soft_reset |= SOFT_RESET_DC;
3916
3917         if (reset_mask & RADEON_RESET_RLC)
3918                 grbm_soft_reset |= SOFT_RESET_RLC;
3919
3920         if (reset_mask & RADEON_RESET_SEM)
3921                 srbm_soft_reset |= SOFT_RESET_SEM;
3922
3923         if (reset_mask & RADEON_RESET_IH)
3924                 srbm_soft_reset |= SOFT_RESET_IH;
3925
3926         if (reset_mask & RADEON_RESET_GRBM)
3927                 srbm_soft_reset |= SOFT_RESET_GRBM;
3928
3929         if (reset_mask & RADEON_RESET_VMC)
3930                 srbm_soft_reset |= SOFT_RESET_VMC;
3931
3932         if (reset_mask & RADEON_RESET_MC)
3933                 srbm_soft_reset |= SOFT_RESET_MC;
3934
3935         if (grbm_soft_reset) {
3936                 tmp = RREG32(GRBM_SOFT_RESET);
3937                 tmp |= grbm_soft_reset;
3938                 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3939                 WREG32(GRBM_SOFT_RESET, tmp);
3940                 tmp = RREG32(GRBM_SOFT_RESET);
3941
3942                 udelay(50);
3943
3944                 tmp &= ~grbm_soft_reset;
3945                 WREG32(GRBM_SOFT_RESET, tmp);
3946                 tmp = RREG32(GRBM_SOFT_RESET);
3947         }
3948
3949         if (srbm_soft_reset) {
3950                 tmp = RREG32(SRBM_SOFT_RESET);
3951                 tmp |= srbm_soft_reset;
3952                 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3953                 WREG32(SRBM_SOFT_RESET, tmp);
3954                 tmp = RREG32(SRBM_SOFT_RESET);
3955
3956                 udelay(50);
3957
3958                 tmp &= ~srbm_soft_reset;
3959                 WREG32(SRBM_SOFT_RESET, tmp);
3960                 tmp = RREG32(SRBM_SOFT_RESET);
3961         }
3962
3963         /* Wait a little for things to settle down */
3964         udelay(50);
3965
3966         evergreen_mc_resume(rdev, &save);
3967         udelay(50);
3968
3969         evergreen_print_gpu_status_regs(rdev);
3970 }
3971
3972 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3973 {
3974         u32 tmp, i;
3975
3976         tmp = RREG32(CG_SPLL_FUNC_CNTL);
3977         tmp |= SPLL_BYPASS_EN;
3978         WREG32(CG_SPLL_FUNC_CNTL, tmp);
3979
3980         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3981         tmp |= SPLL_CTLREQ_CHG;
3982         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3983
3984         for (i = 0; i < rdev->usec_timeout; i++) {
3985                 if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3986                         break;
3987                 udelay(1);
3988         }
3989
3990         tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3991         tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3992         WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3993
3994         tmp = RREG32(MPLL_CNTL_MODE);
3995         tmp &= ~MPLL_MCLK_SEL;
3996         WREG32(MPLL_CNTL_MODE, tmp);
3997 }
3998
3999 static void si_spll_powerdown(struct radeon_device *rdev)
4000 {
4001         u32 tmp;
4002
4003         tmp = RREG32(SPLL_CNTL_MODE);
4004         tmp |= SPLL_SW_DIR_CONTROL;
4005         WREG32(SPLL_CNTL_MODE, tmp);
4006
4007         tmp = RREG32(CG_SPLL_FUNC_CNTL);
4008         tmp |= SPLL_RESET;
4009         WREG32(CG_SPLL_FUNC_CNTL, tmp);
4010
4011         tmp = RREG32(CG_SPLL_FUNC_CNTL);
4012         tmp |= SPLL_SLEEP;
4013         WREG32(CG_SPLL_FUNC_CNTL, tmp);
4014
4015         tmp = RREG32(SPLL_CNTL_MODE);
4016         tmp &= ~SPLL_SW_DIR_CONTROL;
4017         WREG32(SPLL_CNTL_MODE, tmp);
4018 }
4019
4020 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4021 {
4022         struct evergreen_mc_save save;
4023         u32 tmp, i;
4024
4025         dev_info(rdev->dev, "GPU pci config reset\n");
4026
4027         /* disable dpm? */
4028
4029         /* disable cg/pg */
4030         si_fini_pg(rdev);
4031         si_fini_cg(rdev);
4032
4033         /* Disable CP parsing/prefetching */
4034         WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4035         /* dma0 */
4036         tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4037         tmp &= ~DMA_RB_ENABLE;
4038         WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4039         /* dma1 */
4040         tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4041         tmp &= ~DMA_RB_ENABLE;
4042         WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4043         /* XXX other engines? */
4044
4045         /* halt the rlc, disable cp internal ints */
4046         si_rlc_stop(rdev);
4047
4048         udelay(50);
4049
4050         /* disable mem access */
4051         evergreen_mc_stop(rdev, &save);
4052         if (evergreen_mc_wait_for_idle(rdev)) {
4053                 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4054         }
4055
4056         /* set mclk/sclk to bypass */
4057         si_set_clk_bypass_mode(rdev);
4058         /* powerdown spll */
4059         si_spll_powerdown(rdev);
4060         /* disable BM */
4061         pci_clear_master(rdev->pdev);
4062         /* reset */
4063         radeon_pci_config_reset(rdev);
4064         /* wait for asic to come out of reset */
4065         for (i = 0; i < rdev->usec_timeout; i++) {
4066                 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4067                         break;
4068                 udelay(1);
4069         }
4070 }
4071
4072 int si_asic_reset(struct radeon_device *rdev, bool hard)
4073 {
4074         u32 reset_mask;
4075
4076         if (hard) {
4077                 si_gpu_pci_config_reset(rdev);
4078                 return 0;
4079         }
4080
4081         reset_mask = si_gpu_check_soft_reset(rdev);
4082
4083         if (reset_mask)
4084                 r600_set_bios_scratch_engine_hung(rdev, true);
4085
4086         /* try soft reset */
4087         si_gpu_soft_reset(rdev, reset_mask);
4088
4089         reset_mask = si_gpu_check_soft_reset(rdev);
4090
4091         /* try pci config reset */
4092         if (reset_mask && radeon_hard_reset)
4093                 si_gpu_pci_config_reset(rdev);
4094
4095         reset_mask = si_gpu_check_soft_reset(rdev);
4096
4097         if (!reset_mask)
4098                 r600_set_bios_scratch_engine_hung(rdev, false);
4099
4100         return 0;
4101 }
4102
4103 /**
4104  * si_gfx_is_lockup - Check if the GFX engine is locked up
4105  *
4106  * @rdev: radeon_device pointer
4107  * @ring: radeon_ring structure holding ring information
4108  *
4109  * Check if the GFX engine is locked up.
4110  * Returns true if the engine appears to be locked up, false if not.
4111  */
4112 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4113 {
4114         u32 reset_mask = si_gpu_check_soft_reset(rdev);
4115
4116         if (!(reset_mask & (RADEON_RESET_GFX |
4117                             RADEON_RESET_COMPUTE |
4118                             RADEON_RESET_CP))) {
4119                 radeon_ring_lockup_update(rdev, ring);
4120                 return false;
4121         }
4122         return radeon_ring_test_lockup(rdev, ring);
4123 }
4124
4125 /* MC */
4126 static void si_mc_program(struct radeon_device *rdev)
4127 {
4128         struct evergreen_mc_save save;
4129         u32 tmp;
4130         int i, j;
4131
4132         /* Initialize HDP */
4133         for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4134                 WREG32((0x2c14 + j), 0x00000000);
4135                 WREG32((0x2c18 + j), 0x00000000);
4136                 WREG32((0x2c1c + j), 0x00000000);
4137                 WREG32((0x2c20 + j), 0x00000000);
4138                 WREG32((0x2c24 + j), 0x00000000);
4139         }
4140         WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4141
4142         evergreen_mc_stop(rdev, &save);
4143         if (radeon_mc_wait_for_idle(rdev)) {
4144                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4145         }
4146         if (!ASIC_IS_NODCE(rdev))
4147                 /* Lockout access through VGA aperture*/
4148                 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4149         /* Update configuration */
4150         WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4151                rdev->mc.vram_start >> 12);
4152         WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4153                rdev->mc.vram_end >> 12);
4154         WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4155                rdev->vram_scratch.gpu_addr >> 12);
4156         tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4157         tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4158         WREG32(MC_VM_FB_LOCATION, tmp);
4159         /* XXX double check these! */
4160         WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4161         WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4162         WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4163         WREG32(MC_VM_AGP_BASE, 0);
4164         WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4165         WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4166         if (radeon_mc_wait_for_idle(rdev)) {
4167                 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4168         }
4169         evergreen_mc_resume(rdev, &save);
4170         if (!ASIC_IS_NODCE(rdev)) {
4171                 /* we need to own VRAM, so turn off the VGA renderer here
4172                  * to stop it overwriting our objects */
4173                 rv515_vga_render_disable(rdev);
4174         }
4175 }
4176
4177 void si_vram_gtt_location(struct radeon_device *rdev,
4178                           struct radeon_mc *mc)
4179 {
4180         if (mc->mc_vram_size > 0xFFC0000000ULL) {
4181                 /* leave room for at least 1024M GTT */
4182                 dev_warn(rdev->dev, "limiting VRAM\n");
4183                 mc->real_vram_size = 0xFFC0000000ULL;
4184                 mc->mc_vram_size = 0xFFC0000000ULL;
4185         }
4186         radeon_vram_location(rdev, &rdev->mc, 0);
4187         rdev->mc.gtt_base_align = 0;
4188         radeon_gtt_location(rdev, mc);
4189 }
4190
4191 static int si_mc_init(struct radeon_device *rdev)
4192 {
4193         u32 tmp;
4194         int chansize, numchan;
4195
4196         /* Get VRAM informations */
4197         rdev->mc.vram_is_ddr = true;
4198         tmp = RREG32(MC_ARB_RAMCFG);
4199         if (tmp & CHANSIZE_OVERRIDE) {
4200                 chansize = 16;
4201         } else if (tmp & CHANSIZE_MASK) {
4202                 chansize = 64;
4203         } else {
4204                 chansize = 32;
4205         }
4206         tmp = RREG32(MC_SHARED_CHMAP);
4207         switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4208         case 0:
4209         default:
4210                 numchan = 1;
4211                 break;
4212         case 1:
4213                 numchan = 2;
4214                 break;
4215         case 2:
4216                 numchan = 4;
4217                 break;
4218         case 3:
4219                 numchan = 8;
4220                 break;
4221         case 4:
4222                 numchan = 3;
4223                 break;
4224         case 5:
4225                 numchan = 6;
4226                 break;
4227         case 6:
4228                 numchan = 10;
4229                 break;
4230         case 7:
4231                 numchan = 12;
4232                 break;
4233         case 8:
4234                 numchan = 16;
4235                 break;
4236         }
4237         rdev->mc.vram_width = numchan * chansize;
4238         /* Could aper size report 0 ? */
4239         rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4240         rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4241         /* size in MB on si */
4242         tmp = RREG32(CONFIG_MEMSIZE);
4243         /* some boards may have garbage in the upper 16 bits */
4244         if (tmp & 0xffff0000) {
4245                 DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4246                 if (tmp & 0xffff)
4247                         tmp &= 0xffff;
4248         }
4249         rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4250         rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4251         rdev->mc.visible_vram_size = rdev->mc.aper_size;
4252         si_vram_gtt_location(rdev, &rdev->mc);
4253         radeon_update_bandwidth_info(rdev);
4254
4255         return 0;
4256 }
4257
4258 /*
4259  * GART
4260  */
4261 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4262 {
4263         /* flush hdp cache */
4264         WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4265
4266         /* bits 0-15 are the VM contexts0-15 */
4267         WREG32(VM_INVALIDATE_REQUEST, 1);
4268 }
4269
4270 static int si_pcie_gart_enable(struct radeon_device *rdev)
4271 {
4272         int r, i;
4273
4274         if (rdev->gart.robj == NULL) {
4275                 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4276                 return -EINVAL;
4277         }
4278         r = radeon_gart_table_vram_pin(rdev);
4279         if (r)
4280                 return r;
4281         /* Setup TLB control */
4282         WREG32(MC_VM_MX_L1_TLB_CNTL,
4283                (0xA << 7) |
4284                ENABLE_L1_TLB |
4285                ENABLE_L1_FRAGMENT_PROCESSING |
4286                SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4287                ENABLE_ADVANCED_DRIVER_MODEL |
4288                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4289         /* Setup L2 cache */
4290         WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4291                ENABLE_L2_FRAGMENT_PROCESSING |
4292                ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4293                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4294                EFFECTIVE_L2_QUEUE_SIZE(7) |
4295                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4296         WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4297         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4298                BANK_SELECT(4) |
4299                L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4300         /* setup context0 */
4301         WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4302         WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4303         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4304         WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4305                         (u32)(rdev->dummy_page.addr >> 12));
4306         WREG32(VM_CONTEXT0_CNTL2, 0);
4307         WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4308                                   RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4309
4310         WREG32(0x15D4, 0);
4311         WREG32(0x15D8, 0);
4312         WREG32(0x15DC, 0);
4313
4314         /* empty context1-15 */
4315         /* set vm size, must be a multiple of 4 */
4316         WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4317         WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4318         /* Assign the pt base to something valid for now; the pts used for
4319          * the VMs are determined by the application and setup and assigned
4320          * on the fly in the vm part of radeon_gart.c
4321          */
4322         for (i = 1; i < 16; i++) {
4323                 if (i < 8)
4324                         WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4325                                rdev->vm_manager.saved_table_addr[i]);
4326                 else
4327                         WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4328                                rdev->vm_manager.saved_table_addr[i]);
4329         }
4330
4331         /* enable context1-15 */
4332         WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4333                (u32)(rdev->dummy_page.addr >> 12));
4334         WREG32(VM_CONTEXT1_CNTL2, 4);
4335         WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4336                                 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4337                                 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4338                                 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4339                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4340                                 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4341                                 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4342                                 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4343                                 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4344                                 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4345                                 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4346                                 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4347                                 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4348                                 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4349
4350         si_pcie_gart_tlb_flush(rdev);
4351         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4352                  (unsigned)(rdev->mc.gtt_size >> 20),
4353                  (unsigned long long)rdev->gart.table_addr);
4354         rdev->gart.ready = true;
4355         return 0;
4356 }
4357
4358 static void si_pcie_gart_disable(struct radeon_device *rdev)
4359 {
4360         unsigned i;
4361
4362         for (i = 1; i < 16; ++i) {
4363                 uint32_t reg;
4364                 if (i < 8)
4365                         reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4366                 else
4367                         reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4368                 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4369         }
4370
4371         /* Disable all tables */
4372         WREG32(VM_CONTEXT0_CNTL, 0);
4373         WREG32(VM_CONTEXT1_CNTL, 0);
4374         /* Setup TLB control */
4375         WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4376                SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4377         /* Setup L2 cache */
4378         WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4379                ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4380                EFFECTIVE_L2_QUEUE_SIZE(7) |
4381                CONTEXT1_IDENTITY_ACCESS_MODE(1));
4382         WREG32(VM_L2_CNTL2, 0);
4383         WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4384                L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4385         radeon_gart_table_vram_unpin(rdev);
4386 }
4387
4388 static void si_pcie_gart_fini(struct radeon_device *rdev)
4389 {
4390         si_pcie_gart_disable(rdev);
4391         radeon_gart_table_vram_free(rdev);
4392         radeon_gart_fini(rdev);
4393 }
4394
4395 /* vm parser */
4396 static bool si_vm_reg_valid(u32 reg)
4397 {
4398         /* context regs are fine */
4399         if (reg >= 0x28000)
4400                 return true;
4401
4402         /* shader regs are also fine */
4403         if (reg >= 0xB000 && reg < 0xC000)
4404                 return true;
4405
4406         /* check config regs */
4407         switch (reg) {
4408         case GRBM_GFX_INDEX:
4409         case CP_STRMOUT_CNTL:
4410         case VGT_VTX_VECT_EJECT_REG:
4411         case VGT_CACHE_INVALIDATION:
4412         case VGT_ESGS_RING_SIZE:
4413         case VGT_GSVS_RING_SIZE:
4414         case VGT_GS_VERTEX_REUSE:
4415         case VGT_PRIMITIVE_TYPE:
4416         case VGT_INDEX_TYPE:
4417         case VGT_NUM_INDICES:
4418         case VGT_NUM_INSTANCES:
4419         case VGT_TF_RING_SIZE:
4420         case VGT_HS_OFFCHIP_PARAM:
4421         case VGT_TF_MEMORY_BASE:
4422         case PA_CL_ENHANCE:
4423         case PA_SU_LINE_STIPPLE_VALUE:
4424         case PA_SC_LINE_STIPPLE_STATE:
4425         case PA_SC_ENHANCE:
4426         case SQC_CACHES:
4427         case SPI_STATIC_THREAD_MGMT_1:
4428         case SPI_STATIC_THREAD_MGMT_2:
4429         case SPI_STATIC_THREAD_MGMT_3:
4430         case SPI_PS_MAX_WAVE_ID:
4431         case SPI_CONFIG_CNTL:
4432         case SPI_CONFIG_CNTL_1:
4433         case TA_CNTL_AUX:
4434         case TA_CS_BC_BASE_ADDR:
4435                 return true;
4436         default:
4437                 DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4438                 return false;
4439         }
4440 }
4441
4442 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4443                                   u32 *ib, struct radeon_cs_packet *pkt)
4444 {
4445         switch (pkt->opcode) {
4446         case PACKET3_NOP:
4447         case PACKET3_SET_BASE:
4448         case PACKET3_SET_CE_DE_COUNTERS:
4449         case PACKET3_LOAD_CONST_RAM:
4450         case PACKET3_WRITE_CONST_RAM:
4451         case PACKET3_WRITE_CONST_RAM_OFFSET:
4452         case PACKET3_DUMP_CONST_RAM:
4453         case PACKET3_INCREMENT_CE_COUNTER:
4454         case PACKET3_WAIT_ON_DE_COUNTER:
4455         case PACKET3_CE_WRITE:
4456                 break;
4457         default:
4458                 DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4459                 return -EINVAL;
4460         }
4461         return 0;
4462 }
4463
4464 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4465 {
4466         u32 start_reg, reg, i;
4467         u32 command = ib[idx + 4];
4468         u32 info = ib[idx + 1];
4469         u32 idx_value = ib[idx];
4470         if (command & PACKET3_CP_DMA_CMD_SAS) {
4471                 /* src address space is register */
4472                 if (((info & 0x60000000) >> 29) == 0) {
4473                         start_reg = idx_value << 2;
4474                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
4475                                 reg = start_reg;
4476                                 if (!si_vm_reg_valid(reg)) {
4477                                         DRM_ERROR("CP DMA Bad SRC register\n");
4478                                         return -EINVAL;
4479                                 }
4480                         } else {
4481                                 for (i = 0; i < (command & 0x1fffff); i++) {
4482                                         reg = start_reg + (4 * i);
4483                                         if (!si_vm_reg_valid(reg)) {
4484                                                 DRM_ERROR("CP DMA Bad SRC register\n");
4485                                                 return -EINVAL;
4486                                         }
4487                                 }
4488                         }
4489                 }
4490         }
4491         if (command & PACKET3_CP_DMA_CMD_DAS) {
4492                 /* dst address space is register */
4493                 if (((info & 0x00300000) >> 20) == 0) {
4494                         start_reg = ib[idx + 2];
4495                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
4496                                 reg = start_reg;
4497                                 if (!si_vm_reg_valid(reg)) {
4498                                         DRM_ERROR("CP DMA Bad DST register\n");
4499                                         return -EINVAL;
4500                                 }
4501                         } else {
4502                                 for (i = 0; i < (command & 0x1fffff); i++) {
4503                                         reg = start_reg + (4 * i);
4504                                 if (!si_vm_reg_valid(reg)) {
4505                                                 DRM_ERROR("CP DMA Bad DST register\n");
4506                                                 return -EINVAL;
4507                                         }
4508                                 }
4509                         }
4510                 }
4511         }
4512         return 0;
4513 }
4514
4515 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4516                                    u32 *ib, struct radeon_cs_packet *pkt)
4517 {
4518         int r;
4519         u32 idx = pkt->idx + 1;
4520         u32 idx_value = ib[idx];
4521         u32 start_reg, end_reg, reg, i;
4522
4523         switch (pkt->opcode) {
4524         case PACKET3_NOP:
4525         case PACKET3_SET_BASE:
4526         case PACKET3_CLEAR_STATE:
4527         case PACKET3_INDEX_BUFFER_SIZE:
4528         case PACKET3_DISPATCH_DIRECT:
4529         case PACKET3_DISPATCH_INDIRECT:
4530         case PACKET3_ALLOC_GDS:
4531         case PACKET3_WRITE_GDS_RAM:
4532         case PACKET3_ATOMIC_GDS:
4533         case PACKET3_ATOMIC:
4534         case PACKET3_OCCLUSION_QUERY:
4535         case PACKET3_SET_PREDICATION:
4536         case PACKET3_COND_EXEC:
4537         case PACKET3_PRED_EXEC:
4538         case PACKET3_DRAW_INDIRECT:
4539         case PACKET3_DRAW_INDEX_INDIRECT:
4540         case PACKET3_INDEX_BASE:
4541         case PACKET3_DRAW_INDEX_2:
4542         case PACKET3_CONTEXT_CONTROL:
4543         case PACKET3_INDEX_TYPE:
4544         case PACKET3_DRAW_INDIRECT_MULTI:
4545         case PACKET3_DRAW_INDEX_AUTO:
4546         case PACKET3_DRAW_INDEX_IMMD:
4547         case PACKET3_NUM_INSTANCES:
4548         case PACKET3_DRAW_INDEX_MULTI_AUTO:
4549         case PACKET3_STRMOUT_BUFFER_UPDATE:
4550         case PACKET3_DRAW_INDEX_OFFSET_2:
4551         case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4552         case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4553         case PACKET3_MPEG_INDEX:
4554         case PACKET3_WAIT_REG_MEM:
4555         case PACKET3_MEM_WRITE:
4556         case PACKET3_PFP_SYNC_ME:
4557         case PACKET3_SURFACE_SYNC:
4558         case PACKET3_EVENT_WRITE:
4559         case PACKET3_EVENT_WRITE_EOP:
4560         case PACKET3_EVENT_WRITE_EOS:
4561         case PACKET3_SET_CONTEXT_REG:
4562         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4563         case PACKET3_SET_SH_REG:
4564         case PACKET3_SET_SH_REG_OFFSET:
4565         case PACKET3_INCREMENT_DE_COUNTER:
4566         case PACKET3_WAIT_ON_CE_COUNTER:
4567         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4568         case PACKET3_ME_WRITE:
4569                 break;
4570         case PACKET3_COPY_DATA:
4571                 if ((idx_value & 0xf00) == 0) {
4572                         reg = ib[idx + 3] * 4;
4573                         if (!si_vm_reg_valid(reg))
4574                                 return -EINVAL;
4575                 }
4576                 break;
4577         case PACKET3_WRITE_DATA:
4578                 if ((idx_value & 0xf00) == 0) {
4579                         start_reg = ib[idx + 1] * 4;
4580                         if (idx_value & 0x10000) {
4581                                 if (!si_vm_reg_valid(start_reg))
4582                                         return -EINVAL;
4583                         } else {
4584                                 for (i = 0; i < (pkt->count - 2); i++) {
4585                                         reg = start_reg + (4 * i);
4586                                         if (!si_vm_reg_valid(reg))
4587                                                 return -EINVAL;
4588                                 }
4589                         }
4590                 }
4591                 break;
4592         case PACKET3_COND_WRITE:
4593                 if (idx_value & 0x100) {
4594                         reg = ib[idx + 5] * 4;
4595                         if (!si_vm_reg_valid(reg))
4596                                 return -EINVAL;
4597                 }
4598                 break;
4599         case PACKET3_COPY_DW:
4600                 if (idx_value & 0x2) {
4601                         reg = ib[idx + 3] * 4;
4602                         if (!si_vm_reg_valid(reg))
4603                                 return -EINVAL;
4604                 }
4605                 break;
4606         case PACKET3_SET_CONFIG_REG:
4607                 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4608                 end_reg = 4 * pkt->count + start_reg - 4;
4609                 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4610                     (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4611                     (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4612                         DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4613                         return -EINVAL;
4614                 }
4615                 for (i = 0; i < pkt->count; i++) {
4616                         reg = start_reg + (4 * i);
4617                         if (!si_vm_reg_valid(reg))
4618                                 return -EINVAL;
4619                 }
4620                 break;
4621         case PACKET3_CP_DMA:
4622                 r = si_vm_packet3_cp_dma_check(ib, idx);
4623                 if (r)
4624                         return r;
4625                 break;
4626         default:
4627                 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4628                 return -EINVAL;
4629         }
4630         return 0;
4631 }
4632
4633 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4634                                        u32 *ib, struct radeon_cs_packet *pkt)
4635 {
4636         int r;
4637         u32 idx = pkt->idx + 1;
4638         u32 idx_value = ib[idx];
4639         u32 start_reg, reg, i;
4640
4641         switch (pkt->opcode) {
4642         case PACKET3_NOP:
4643         case PACKET3_SET_BASE:
4644         case PACKET3_CLEAR_STATE:
4645         case PACKET3_DISPATCH_DIRECT:
4646         case PACKET3_DISPATCH_INDIRECT:
4647         case PACKET3_ALLOC_GDS:
4648         case PACKET3_WRITE_GDS_RAM:
4649         case PACKET3_ATOMIC_GDS:
4650         case PACKET3_ATOMIC:
4651         case PACKET3_OCCLUSION_QUERY:
4652         case PACKET3_SET_PREDICATION:
4653         case PACKET3_COND_EXEC:
4654         case PACKET3_PRED_EXEC:
4655         case PACKET3_CONTEXT_CONTROL:
4656         case PACKET3_STRMOUT_BUFFER_UPDATE:
4657         case PACKET3_WAIT_REG_MEM:
4658         case PACKET3_MEM_WRITE:
4659         case PACKET3_PFP_SYNC_ME:
4660         case PACKET3_SURFACE_SYNC:
4661         case PACKET3_EVENT_WRITE:
4662         case PACKET3_EVENT_WRITE_EOP:
4663         case PACKET3_EVENT_WRITE_EOS:
4664         case PACKET3_SET_CONTEXT_REG:
4665         case PACKET3_SET_CONTEXT_REG_INDIRECT:
4666         case PACKET3_SET_SH_REG:
4667         case PACKET3_SET_SH_REG_OFFSET:
4668         case PACKET3_INCREMENT_DE_COUNTER:
4669         case PACKET3_WAIT_ON_CE_COUNTER:
4670         case PACKET3_WAIT_ON_AVAIL_BUFFER:
4671         case PACKET3_ME_WRITE:
4672                 break;
4673         case PACKET3_COPY_DATA:
4674                 if ((idx_value & 0xf00) == 0) {
4675                         reg = ib[idx + 3] * 4;
4676                         if (!si_vm_reg_valid(reg))
4677                                 return -EINVAL;
4678                 }
4679                 break;
4680         case PACKET3_WRITE_DATA:
4681                 if ((idx_value & 0xf00) == 0) {
4682                         start_reg = ib[idx + 1] * 4;
4683                         if (idx_value & 0x10000) {
4684                                 if (!si_vm_reg_valid(start_reg))
4685                                         return -EINVAL;
4686                         } else {
4687                                 for (i = 0; i < (pkt->count - 2); i++) {
4688                                         reg = start_reg + (4 * i);
4689                                         if (!si_vm_reg_valid(reg))
4690                                                 return -EINVAL;
4691                                 }
4692                         }
4693                 }
4694                 break;
4695         case PACKET3_COND_WRITE:
4696                 if (idx_value & 0x100) {
4697                         reg = ib[idx + 5] * 4;
4698                         if (!si_vm_reg_valid(reg))
4699                                 return -EINVAL;
4700                 }
4701                 break;
4702         case PACKET3_COPY_DW:
4703                 if (idx_value & 0x2) {
4704                         reg = ib[idx + 3] * 4;
4705                         if (!si_vm_reg_valid(reg))
4706                                 return -EINVAL;
4707                 }
4708                 break;
4709         case PACKET3_CP_DMA:
4710                 r = si_vm_packet3_cp_dma_check(ib, idx);
4711                 if (r)
4712                         return r;
4713                 break;
4714         default:
4715                 DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4716                 return -EINVAL;
4717         }
4718         return 0;
4719 }
4720
4721 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4722 {
4723         int ret = 0;
4724         u32 idx = 0, i;
4725         struct radeon_cs_packet pkt;
4726
4727         do {
4728                 pkt.idx = idx;
4729                 pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4730                 pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4731                 pkt.one_reg_wr = 0;
4732                 switch (pkt.type) {
4733                 case RADEON_PACKET_TYPE0:
4734                         dev_err(rdev->dev, "Packet0 not allowed!\n");
4735                         ret = -EINVAL;
4736                         break;
4737                 case RADEON_PACKET_TYPE2:
4738                         idx += 1;
4739                         break;
4740                 case RADEON_PACKET_TYPE3:
4741                         pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4742                         if (ib->is_const_ib)
4743                                 ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4744                         else {
4745                                 switch (ib->ring) {
4746                                 case RADEON_RING_TYPE_GFX_INDEX:
4747                                         ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4748                                         break;
4749                                 case CAYMAN_RING_TYPE_CP1_INDEX:
4750                                 case CAYMAN_RING_TYPE_CP2_INDEX:
4751                                         ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4752                                         break;
4753                                 default:
4754                                         dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4755                                         ret = -EINVAL;
4756                                         break;
4757                                 }
4758                         }
4759                         idx += pkt.count + 2;
4760                         break;
4761                 default:
4762                         dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4763                         ret = -EINVAL;
4764                         break;
4765                 }
4766                 if (ret) {
4767                         for (i = 0; i < ib->length_dw; i++) {
4768                                 if (i == idx)
4769                                         printk("\t0x%08x <---\n", ib->ptr[i]);
4770                                 else
4771                                         printk("\t0x%08x\n", ib->ptr[i]);
4772                         }
4773                         break;
4774                 }
4775         } while (idx < ib->length_dw);
4776
4777         return ret;
4778 }
4779
4780 /*
4781  * vm
4782  */
4783 int si_vm_init(struct radeon_device *rdev)
4784 {
4785         /* number of VMs */
4786         rdev->vm_manager.nvm = 16;
4787         /* base offset of vram pages */
4788         rdev->vm_manager.vram_base_offset = 0;
4789
4790         return 0;
4791 }
4792
4793 void si_vm_fini(struct radeon_device *rdev)
4794 {
4795 }
4796
4797 /**
4798  * si_vm_decode_fault - print human readable fault info
4799  *
4800  * @rdev: radeon_device pointer
4801  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4802  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4803  *
4804  * Print human readable fault information (SI).
4805  */
4806 static void si_vm_decode_fault(struct radeon_device *rdev,
4807                                u32 status, u32 addr)
4808 {
4809         u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4810         u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4811         u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4812         char *block;
4813
4814         if (rdev->family == CHIP_TAHITI) {
4815                 switch (mc_id) {
4816                 case 160:
4817                 case 144:
4818                 case 96:
4819                 case 80:
4820                 case 224:
4821                 case 208:
4822                 case 32:
4823                 case 16:
4824                         block = "CB";
4825                         break;
4826                 case 161:
4827                 case 145:
4828                 case 97:
4829                 case 81:
4830                 case 225:
4831                 case 209:
4832                 case 33:
4833                 case 17:
4834                         block = "CB_FMASK";
4835                         break;
4836                 case 162:
4837                 case 146:
4838                 case 98:
4839                 case 82:
4840                 case 226:
4841                 case 210:
4842                 case 34:
4843                 case 18:
4844                         block = "CB_CMASK";
4845                         break;
4846                 case 163:
4847                 case 147:
4848                 case 99:
4849                 case 83:
4850                 case 227:
4851                 case 211:
4852                 case 35:
4853                 case 19:
4854                         block = "CB_IMMED";
4855                         break;
4856                 case 164:
4857                 case 148:
4858                 case 100:
4859                 case 84:
4860                 case 228:
4861                 case 212:
4862                 case 36:
4863                 case 20:
4864                         block = "DB";
4865                         break;
4866                 case 165:
4867                 case 149:
4868                 case 101:
4869                 case 85:
4870                 case 229:
4871                 case 213:
4872                 case 37:
4873                 case 21:
4874                         block = "DB_HTILE";
4875                         break;
4876                 case 167:
4877                 case 151:
4878                 case 103:
4879                 case 87:
4880                 case 231:
4881                 case 215:
4882                 case 39:
4883                 case 23:
4884                         block = "DB_STEN";
4885                         break;
4886                 case 72:
4887                 case 68:
4888                 case 64:
4889                 case 8:
4890                 case 4:
4891                 case 0:
4892                 case 136:
4893                 case 132:
4894                 case 128:
4895                 case 200:
4896                 case 196:
4897                 case 192:
4898                         block = "TC";
4899                         break;
4900                 case 112:
4901                 case 48:
4902                         block = "CP";
4903                         break;
4904                 case 49:
4905                 case 177:
4906                 case 50:
4907                 case 178:
4908                         block = "SH";
4909                         break;
4910                 case 53:
4911                 case 190:
4912                         block = "VGT";
4913                         break;
4914                 case 117:
4915                         block = "IH";
4916                         break;
4917                 case 51:
4918                 case 115:
4919                         block = "RLC";
4920                         break;
4921                 case 119:
4922                 case 183:
4923                         block = "DMA0";
4924                         break;
4925                 case 61:
4926                         block = "DMA1";
4927                         break;
4928                 case 248:
4929                 case 120:
4930                         block = "HDP";
4931                         break;
4932                 default:
4933                         block = "unknown";
4934                         break;
4935                 }
4936         } else {
4937                 switch (mc_id) {
4938                 case 32:
4939                 case 16:
4940                 case 96:
4941                 case 80:
4942                 case 160:
4943                 case 144:
4944                 case 224:
4945                 case 208:
4946                         block = "CB";
4947                         break;
4948                 case 33:
4949                 case 17:
4950                 case 97:
4951                 case 81:
4952                 case 161:
4953                 case 145:
4954                 case 225:
4955                 case 209:
4956                         block = "CB_FMASK";
4957                         break;
4958                 case 34:
4959                 case 18:
4960                 case 98:
4961                 case 82:
4962                 case 162:
4963                 case 146:
4964                 case 226:
4965                 case 210:
4966                         block = "CB_CMASK";
4967                         break;
4968                 case 35:
4969                 case 19:
4970                 case 99:
4971                 case 83:
4972                 case 163:
4973                 case 147:
4974                 case 227:
4975                 case 211:
4976                         block = "CB_IMMED";
4977                         break;
4978                 case 36:
4979                 case 20:
4980                 case 100:
4981                 case 84:
4982                 case 164:
4983                 case 148:
4984                 case 228:
4985                 case 212:
4986                         block = "DB";
4987                         break;
4988                 case 37:
4989                 case 21:
4990                 case 101:
4991                 case 85:
4992                 case 165:
4993                 case 149:
4994                 case 229:
4995                 case 213:
4996                         block = "DB_HTILE";
4997                         break;
4998                 case 39:
4999                 case 23:
5000                 case 103:
5001                 case 87:
5002                 case 167:
5003                 case 151:
5004                 case 231:
5005                 case 215:
5006                         block = "DB_STEN";
5007                         break;
5008                 case 72:
5009                 case 68:
5010                 case 8:
5011                 case 4:
5012                 case 136:
5013                 case 132:
5014                 case 200:
5015                 case 196:
5016                         block = "TC";
5017                         break;
5018                 case 112:
5019                 case 48:
5020                         block = "CP";
5021                         break;
5022                 case 49:
5023                 case 177:
5024                 case 50:
5025                 case 178:
5026                         block = "SH";
5027                         break;
5028                 case 53:
5029                         block = "VGT";
5030                         break;
5031                 case 117:
5032                         block = "IH";
5033                         break;
5034                 case 51:
5035                 case 115:
5036                         block = "RLC";
5037                         break;
5038                 case 119:
5039                 case 183:
5040                         block = "DMA0";
5041                         break;
5042                 case 61:
5043                         block = "DMA1";
5044                         break;
5045                 case 248:
5046                 case 120:
5047                         block = "HDP";
5048                         break;
5049                 default:
5050                         block = "unknown";
5051                         break;
5052                 }
5053         }
5054
5055         printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5056                protections, vmid, addr,
5057                (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5058                block, mc_id);
5059 }
5060
5061 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5062                  unsigned vm_id, uint64_t pd_addr)
5063 {
5064         /* write new base address */
5065         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5066         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5067                                  WRITE_DATA_DST_SEL(0)));
5068
5069         if (vm_id < 8) {
5070                 radeon_ring_write(ring,
5071                                   (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5072         } else {
5073                 radeon_ring_write(ring,
5074                                   (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5075         }
5076         radeon_ring_write(ring, 0);
5077         radeon_ring_write(ring, pd_addr >> 12);
5078
5079         /* flush hdp cache */
5080         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5081         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5082                                  WRITE_DATA_DST_SEL(0)));
5083         radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5084         radeon_ring_write(ring, 0);
5085         radeon_ring_write(ring, 0x1);
5086
5087         /* bits 0-15 are the VM contexts0-15 */
5088         radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5089         radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5090                                  WRITE_DATA_DST_SEL(0)));
5091         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5092         radeon_ring_write(ring, 0);
5093         radeon_ring_write(ring, 1 << vm_id);
5094
5095         /* wait for the invalidate to complete */
5096         radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5097         radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5098                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5099         radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5100         radeon_ring_write(ring, 0);
5101         radeon_ring_write(ring, 0); /* ref */
5102         radeon_ring_write(ring, 0); /* mask */
5103         radeon_ring_write(ring, 0x20); /* poll interval */
5104
5105         /* sync PFP to ME, otherwise we might get invalid PFP reads */
5106         radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5107         radeon_ring_write(ring, 0x0);
5108 }
5109
5110 /*
5111  *  Power and clock gating
5112  */
5113 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5114 {
5115         int i;
5116
5117         for (i = 0; i < rdev->usec_timeout; i++) {
5118                 if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5119                         break;
5120                 udelay(1);
5121         }
5122
5123         for (i = 0; i < rdev->usec_timeout; i++) {
5124                 if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5125                         break;
5126                 udelay(1);
5127         }
5128 }
5129
5130 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5131                                          bool enable)
5132 {
5133         u32 tmp = RREG32(CP_INT_CNTL_RING0);
5134         u32 mask;
5135         int i;
5136
5137         if (enable)
5138                 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5139         else
5140                 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5141         WREG32(CP_INT_CNTL_RING0, tmp);
5142
5143         if (!enable) {
5144                 /* read a gfx register */
5145                 tmp = RREG32(DB_DEPTH_INFO);
5146
5147                 mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5148                 for (i = 0; i < rdev->usec_timeout; i++) {
5149                         if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5150                                 break;
5151                         udelay(1);
5152                 }
5153         }
5154 }
5155
5156 static void si_set_uvd_dcm(struct radeon_device *rdev,
5157                            bool sw_mode)
5158 {
5159         u32 tmp, tmp2;
5160
5161         tmp = RREG32(UVD_CGC_CTRL);
5162         tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5163         tmp |= DCM | CG_DT(1) | CLK_OD(4);
5164
5165         if (sw_mode) {
5166                 tmp &= ~0x7ffff800;
5167                 tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5168         } else {
5169                 tmp |= 0x7ffff800;
5170                 tmp2 = 0;
5171         }
5172
5173         WREG32(UVD_CGC_CTRL, tmp);
5174         WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5175 }
5176
5177 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5178 {
5179         bool hw_mode = true;
5180
5181         if (hw_mode) {
5182                 si_set_uvd_dcm(rdev, false);
5183         } else {
5184                 u32 tmp = RREG32(UVD_CGC_CTRL);
5185                 tmp &= ~DCM;
5186                 WREG32(UVD_CGC_CTRL, tmp);
5187         }
5188 }
5189
5190 static u32 si_halt_rlc(struct radeon_device *rdev)
5191 {
5192         u32 data, orig;
5193
5194         orig = data = RREG32(RLC_CNTL);
5195
5196         if (data & RLC_ENABLE) {
5197                 data &= ~RLC_ENABLE;
5198                 WREG32(RLC_CNTL, data);
5199
5200                 si_wait_for_rlc_serdes(rdev);
5201         }
5202
5203         return orig;
5204 }
5205
5206 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5207 {
5208         u32 tmp;
5209
5210         tmp = RREG32(RLC_CNTL);
5211         if (tmp != rlc)
5212                 WREG32(RLC_CNTL, rlc);
5213 }
5214
5215 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5216 {
5217         u32 data, orig;
5218
5219         orig = data = RREG32(DMA_PG);
5220         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5221                 data |= PG_CNTL_ENABLE;
5222         else
5223                 data &= ~PG_CNTL_ENABLE;
5224         if (orig != data)
5225                 WREG32(DMA_PG, data);
5226 }
5227
5228 static void si_init_dma_pg(struct radeon_device *rdev)
5229 {
5230         u32 tmp;
5231
5232         WREG32(DMA_PGFSM_WRITE,  0x00002000);
5233         WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5234
5235         for (tmp = 0; tmp < 5; tmp++)
5236                 WREG32(DMA_PGFSM_WRITE, 0);
5237 }
5238
5239 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5240                                bool enable)
5241 {
5242         u32 tmp;
5243
5244         if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5245                 tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5246                 WREG32(RLC_TTOP_D, tmp);
5247
5248                 tmp = RREG32(RLC_PG_CNTL);
5249                 tmp |= GFX_PG_ENABLE;
5250                 WREG32(RLC_PG_CNTL, tmp);
5251
5252                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5253                 tmp |= AUTO_PG_EN;
5254                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5255         } else {
5256                 tmp = RREG32(RLC_AUTO_PG_CTRL);
5257                 tmp &= ~AUTO_PG_EN;
5258                 WREG32(RLC_AUTO_PG_CTRL, tmp);
5259
5260                 tmp = RREG32(DB_RENDER_CONTROL);
5261         }
5262 }
5263
5264 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5265 {
5266         u32 tmp;
5267
5268         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5269
5270         tmp = RREG32(RLC_PG_CNTL);
5271         tmp |= GFX_PG_SRC;
5272         WREG32(RLC_PG_CNTL, tmp);
5273
5274         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5275
5276         tmp = RREG32(RLC_AUTO_PG_CTRL);
5277
5278         tmp &= ~GRBM_REG_SGIT_MASK;
5279         tmp |= GRBM_REG_SGIT(0x700);
5280         tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5281         WREG32(RLC_AUTO_PG_CTRL, tmp);
5282 }
5283
5284 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5285 {
5286         u32 mask = 0, tmp, tmp1;
5287         int i;
5288
5289         si_select_se_sh(rdev, se, sh);
5290         tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5291         tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5292         si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5293
5294         tmp &= 0xffff0000;
5295
5296         tmp |= tmp1;
5297         tmp >>= 16;
5298
5299         for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5300                 mask <<= 1;
5301                 mask |= 1;
5302         }
5303
5304         return (~tmp) & mask;
5305 }
5306
5307 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5308 {
5309         u32 i, j, k, active_cu_number = 0;
5310         u32 mask, counter, cu_bitmap;
5311         u32 tmp = 0;
5312
5313         for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5314                 for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5315                         mask = 1;
5316                         cu_bitmap = 0;
5317                         counter  = 0;
5318                         for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5319                                 if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5320                                         if (counter < 2)
5321                                                 cu_bitmap |= mask;
5322                                         counter++;
5323                                 }
5324                                 mask <<= 1;
5325                         }
5326
5327                         active_cu_number += counter;
5328                         tmp |= (cu_bitmap << (i * 16 + j * 8));
5329                 }
5330         }
5331
5332         WREG32(RLC_PG_AO_CU_MASK, tmp);
5333
5334         tmp = RREG32(RLC_MAX_PG_CU);
5335         tmp &= ~MAX_PU_CU_MASK;
5336         tmp |= MAX_PU_CU(active_cu_number);
5337         WREG32(RLC_MAX_PG_CU, tmp);
5338 }
5339
5340 static void si_enable_cgcg(struct radeon_device *rdev,
5341                            bool enable)
5342 {
5343         u32 data, orig, tmp;
5344
5345         orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5346
5347         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5348                 si_enable_gui_idle_interrupt(rdev, true);
5349
5350                 WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5351
5352                 tmp = si_halt_rlc(rdev);
5353
5354                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5355                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5356                 WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5357
5358                 si_wait_for_rlc_serdes(rdev);
5359
5360                 si_update_rlc(rdev, tmp);
5361
5362                 WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5363
5364                 data |= CGCG_EN | CGLS_EN;
5365         } else {
5366                 si_enable_gui_idle_interrupt(rdev, false);
5367
5368                 RREG32(CB_CGTT_SCLK_CTRL);
5369                 RREG32(CB_CGTT_SCLK_CTRL);
5370                 RREG32(CB_CGTT_SCLK_CTRL);
5371                 RREG32(CB_CGTT_SCLK_CTRL);
5372
5373                 data &= ~(CGCG_EN | CGLS_EN);
5374         }
5375
5376         if (orig != data)
5377                 WREG32(RLC_CGCG_CGLS_CTRL, data);
5378 }
5379
5380 static void si_enable_mgcg(struct radeon_device *rdev,
5381                            bool enable)
5382 {
5383         u32 data, orig, tmp = 0;
5384
5385         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5386                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5387                 data = 0x96940200;
5388                 if (orig != data)
5389                         WREG32(CGTS_SM_CTRL_REG, data);
5390
5391                 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5392                         orig = data = RREG32(CP_MEM_SLP_CNTL);
5393                         data |= CP_MEM_LS_EN;
5394                         if (orig != data)
5395                                 WREG32(CP_MEM_SLP_CNTL, data);
5396                 }
5397
5398                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5399                 data &= 0xffffffc0;
5400                 if (orig != data)
5401                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5402
5403                 tmp = si_halt_rlc(rdev);
5404
5405                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5406                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5407                 WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5408
5409                 si_update_rlc(rdev, tmp);
5410         } else {
5411                 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5412                 data |= 0x00000003;
5413                 if (orig != data)
5414                         WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5415
5416                 data = RREG32(CP_MEM_SLP_CNTL);
5417                 if (data & CP_MEM_LS_EN) {
5418                         data &= ~CP_MEM_LS_EN;
5419                         WREG32(CP_MEM_SLP_CNTL, data);
5420                 }
5421                 orig = data = RREG32(CGTS_SM_CTRL_REG);
5422                 data |= LS_OVERRIDE | OVERRIDE;
5423                 if (orig != data)
5424                         WREG32(CGTS_SM_CTRL_REG, data);
5425
5426                 tmp = si_halt_rlc(rdev);
5427
5428                 WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5429                 WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5430                 WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5431
5432                 si_update_rlc(rdev, tmp);
5433         }
5434 }
5435
5436 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5437                                bool enable)
5438 {
5439         u32 orig, data, tmp;
5440
5441         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5442                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5443                 tmp |= 0x3fff;
5444                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5445
5446                 orig = data = RREG32(UVD_CGC_CTRL);
5447                 data |= DCM;
5448                 if (orig != data)
5449                         WREG32(UVD_CGC_CTRL, data);
5450
5451                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5452                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5453         } else {
5454                 tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5455                 tmp &= ~0x3fff;
5456                 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5457
5458                 orig = data = RREG32(UVD_CGC_CTRL);
5459                 data &= ~DCM;
5460                 if (orig != data)
5461                         WREG32(UVD_CGC_CTRL, data);
5462
5463                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5464                 WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5465         }
5466 }
5467
5468 static const u32 mc_cg_registers[] =
5469 {
5470         MC_HUB_MISC_HUB_CG,
5471         MC_HUB_MISC_SIP_CG,
5472         MC_HUB_MISC_VM_CG,
5473         MC_XPB_CLK_GAT,
5474         ATC_MISC_CG,
5475         MC_CITF_MISC_WR_CG,
5476         MC_CITF_MISC_RD_CG,
5477         MC_CITF_MISC_VM_CG,
5478         VM_L2_CG,
5479 };
5480
5481 static void si_enable_mc_ls(struct radeon_device *rdev,
5482                             bool enable)
5483 {
5484         int i;
5485         u32 orig, data;
5486
5487         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5488                 orig = data = RREG32(mc_cg_registers[i]);
5489                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5490                         data |= MC_LS_ENABLE;
5491                 else
5492                         data &= ~MC_LS_ENABLE;
5493                 if (data != orig)
5494                         WREG32(mc_cg_registers[i], data);
5495         }
5496 }
5497
5498 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5499                                bool enable)
5500 {
5501         int i;
5502         u32 orig, data;
5503
5504         for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5505                 orig = data = RREG32(mc_cg_registers[i]);
5506                 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5507                         data |= MC_CG_ENABLE;
5508                 else
5509                         data &= ~MC_CG_ENABLE;
5510                 if (data != orig)
5511                         WREG32(mc_cg_registers[i], data);
5512         }
5513 }
5514
5515 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5516                                bool enable)
5517 {
5518         u32 orig, data, offset;
5519         int i;
5520
5521         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5522                 for (i = 0; i < 2; i++) {
5523                         if (i == 0)
5524                                 offset = DMA0_REGISTER_OFFSET;
5525                         else
5526                                 offset = DMA1_REGISTER_OFFSET;
5527                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5528                         data &= ~MEM_POWER_OVERRIDE;
5529                         if (data != orig)
5530                                 WREG32(DMA_POWER_CNTL + offset, data);
5531                         WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5532                 }
5533         } else {
5534                 for (i = 0; i < 2; i++) {
5535                         if (i == 0)
5536                                 offset = DMA0_REGISTER_OFFSET;
5537                         else
5538                                 offset = DMA1_REGISTER_OFFSET;
5539                         orig = data = RREG32(DMA_POWER_CNTL + offset);
5540                         data |= MEM_POWER_OVERRIDE;
5541                         if (data != orig)
5542                                 WREG32(DMA_POWER_CNTL + offset, data);
5543
5544                         orig = data = RREG32(DMA_CLK_CTRL + offset);
5545                         data = 0xff000000;
5546                         if (data != orig)
5547                                 WREG32(DMA_CLK_CTRL + offset, data);
5548                 }
5549         }
5550 }
5551
5552 static void si_enable_bif_mgls(struct radeon_device *rdev,
5553                                bool enable)
5554 {
5555         u32 orig, data;
5556
5557         orig = data = RREG32_PCIE(PCIE_CNTL2);
5558
5559         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5560                 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5561                         REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5562         else
5563                 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5564                           REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5565
5566         if (orig != data)
5567                 WREG32_PCIE(PCIE_CNTL2, data);
5568 }
5569
5570 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5571                                bool enable)
5572 {
5573         u32 orig, data;
5574
5575         orig = data = RREG32(HDP_HOST_PATH_CNTL);
5576
5577         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5578                 data &= ~CLOCK_GATING_DIS;
5579         else
5580                 data |= CLOCK_GATING_DIS;
5581
5582         if (orig != data)
5583                 WREG32(HDP_HOST_PATH_CNTL, data);
5584 }
5585
5586 static void si_enable_hdp_ls(struct radeon_device *rdev,
5587                              bool enable)
5588 {
5589         u32 orig, data;
5590
5591         orig = data = RREG32(HDP_MEM_POWER_LS);
5592
5593         if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5594                 data |= HDP_LS_ENABLE;
5595         else
5596                 data &= ~HDP_LS_ENABLE;
5597
5598         if (orig != data)
5599                 WREG32(HDP_MEM_POWER_LS, data);
5600 }
5601
5602 static void si_update_cg(struct radeon_device *rdev,
5603                          u32 block, bool enable)
5604 {
5605         if (block & RADEON_CG_BLOCK_GFX) {
5606                 si_enable_gui_idle_interrupt(rdev, false);
5607                 /* order matters! */
5608                 if (enable) {
5609                         si_enable_mgcg(rdev, true);
5610                         si_enable_cgcg(rdev, true);
5611                 } else {
5612                         si_enable_cgcg(rdev, false);
5613                         si_enable_mgcg(rdev, false);
5614                 }
5615                 si_enable_gui_idle_interrupt(rdev, true);
5616         }
5617
5618         if (block & RADEON_CG_BLOCK_MC) {
5619                 si_enable_mc_mgcg(rdev, enable);
5620                 si_enable_mc_ls(rdev, enable);
5621         }
5622
5623         if (block & RADEON_CG_BLOCK_SDMA) {
5624                 si_enable_dma_mgcg(rdev, enable);
5625         }
5626
5627         if (block & RADEON_CG_BLOCK_BIF) {
5628                 si_enable_bif_mgls(rdev, enable);
5629         }
5630
5631         if (block & RADEON_CG_BLOCK_UVD) {
5632                 if (rdev->has_uvd) {
5633                         si_enable_uvd_mgcg(rdev, enable);
5634                 }
5635         }
5636
5637         if (block & RADEON_CG_BLOCK_HDP) {
5638                 si_enable_hdp_mgcg(rdev, enable);
5639                 si_enable_hdp_ls(rdev, enable);
5640         }
5641 }
5642
5643 static void si_init_cg(struct radeon_device *rdev)
5644 {
5645         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5646                             RADEON_CG_BLOCK_MC |
5647                             RADEON_CG_BLOCK_SDMA |
5648                             RADEON_CG_BLOCK_BIF |
5649                             RADEON_CG_BLOCK_HDP), true);
5650         if (rdev->has_uvd) {
5651                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5652                 si_init_uvd_internal_cg(rdev);
5653         }
5654 }
5655
5656 static void si_fini_cg(struct radeon_device *rdev)
5657 {
5658         if (rdev->has_uvd) {
5659                 si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5660         }
5661         si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5662                             RADEON_CG_BLOCK_MC |
5663                             RADEON_CG_BLOCK_SDMA |
5664                             RADEON_CG_BLOCK_BIF |
5665                             RADEON_CG_BLOCK_HDP), false);
5666 }
5667
5668 u32 si_get_csb_size(struct radeon_device *rdev)
5669 {
5670         u32 count = 0;
5671         const struct cs_section_def *sect = NULL;
5672         const struct cs_extent_def *ext = NULL;
5673
5674         if (rdev->rlc.cs_data == NULL)
5675                 return 0;
5676
5677         /* begin clear state */
5678         count += 2;
5679         /* context control state */
5680         count += 3;
5681
5682         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5683                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5684                         if (sect->id == SECT_CONTEXT)
5685                                 count += 2 + ext->reg_count;
5686                         else
5687                                 return 0;
5688                 }
5689         }
5690         /* pa_sc_raster_config */
5691         count += 3;
5692         /* end clear state */
5693         count += 2;
5694         /* clear state */
5695         count += 2;
5696
5697         return count;
5698 }
5699
5700 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5701 {
5702         u32 count = 0, i;
5703         const struct cs_section_def *sect = NULL;
5704         const struct cs_extent_def *ext = NULL;
5705
5706         if (rdev->rlc.cs_data == NULL)
5707                 return;
5708         if (buffer == NULL)
5709                 return;
5710
5711         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5712         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5713
5714         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5715         buffer[count++] = cpu_to_le32(0x80000000);
5716         buffer[count++] = cpu_to_le32(0x80000000);
5717
5718         for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5719                 for (ext = sect->section; ext->extent != NULL; ++ext) {
5720                         if (sect->id == SECT_CONTEXT) {
5721                                 buffer[count++] =
5722                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5723                                 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5724                                 for (i = 0; i < ext->reg_count; i++)
5725                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
5726                         } else {
5727                                 return;
5728                         }
5729                 }
5730         }
5731
5732         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5733         buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5734         switch (rdev->family) {
5735         case CHIP_TAHITI:
5736         case CHIP_PITCAIRN:
5737                 buffer[count++] = cpu_to_le32(0x2a00126a);
5738                 break;
5739         case CHIP_VERDE:
5740                 buffer[count++] = cpu_to_le32(0x0000124a);
5741                 break;
5742         case CHIP_OLAND:
5743                 buffer[count++] = cpu_to_le32(0x00000082);
5744                 break;
5745         case CHIP_HAINAN:
5746                 buffer[count++] = cpu_to_le32(0x00000000);
5747                 break;
5748         default:
5749                 buffer[count++] = cpu_to_le32(0x00000000);
5750                 break;
5751         }
5752
5753         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5754         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5755
5756         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5757         buffer[count++] = cpu_to_le32(0);
5758 }
5759
5760 static void si_init_pg(struct radeon_device *rdev)
5761 {
5762         if (rdev->pg_flags) {
5763                 if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5764                         si_init_dma_pg(rdev);
5765                 }
5766                 si_init_ao_cu_mask(rdev);
5767                 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5768                         si_init_gfx_cgpg(rdev);
5769                 } else {
5770                         WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5771                         WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5772                 }
5773                 si_enable_dma_pg(rdev, true);
5774                 si_enable_gfx_cgpg(rdev, true);
5775         } else {
5776                 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5777                 WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5778         }
5779 }
5780
5781 static void si_fini_pg(struct radeon_device *rdev)
5782 {
5783         if (rdev->pg_flags) {
5784                 si_enable_dma_pg(rdev, false);
5785                 si_enable_gfx_cgpg(rdev, false);
5786         }
5787 }
5788
5789 /*
5790  * RLC
5791  */
5792 void si_rlc_reset(struct radeon_device *rdev)
5793 {
5794         u32 tmp = RREG32(GRBM_SOFT_RESET);
5795
5796         tmp |= SOFT_RESET_RLC;
5797         WREG32(GRBM_SOFT_RESET, tmp);
5798         udelay(50);
5799         tmp &= ~SOFT_RESET_RLC;
5800         WREG32(GRBM_SOFT_RESET, tmp);
5801         udelay(50);
5802 }
5803
5804 static void si_rlc_stop(struct radeon_device *rdev)
5805 {
5806         WREG32(RLC_CNTL, 0);
5807
5808         si_enable_gui_idle_interrupt(rdev, false);
5809
5810         si_wait_for_rlc_serdes(rdev);
5811 }
5812
5813 static void si_rlc_start(struct radeon_device *rdev)
5814 {
5815         WREG32(RLC_CNTL, RLC_ENABLE);
5816
5817         si_enable_gui_idle_interrupt(rdev, true);
5818
5819         udelay(50);
5820 }
5821
5822 static bool si_lbpw_supported(struct radeon_device *rdev)
5823 {
5824         u32 tmp;
5825
5826         /* Enable LBPW only for DDR3 */
5827         tmp = RREG32(MC_SEQ_MISC0);
5828         if ((tmp & 0xF0000000) == 0xB0000000)
5829                 return true;
5830         return false;
5831 }
5832
5833 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5834 {
5835         u32 tmp;
5836
5837         tmp = RREG32(RLC_LB_CNTL);
5838         if (enable)
5839                 tmp |= LOAD_BALANCE_ENABLE;
5840         else
5841                 tmp &= ~LOAD_BALANCE_ENABLE;
5842         WREG32(RLC_LB_CNTL, tmp);
5843
5844         if (!enable) {
5845                 si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5846                 WREG32(SPI_LB_CU_MASK, 0x00ff);
5847         }
5848 }
5849
5850 static int si_rlc_resume(struct radeon_device *rdev)
5851 {
5852         u32 i;
5853
5854         if (!rdev->rlc_fw)
5855                 return -EINVAL;
5856
5857         si_rlc_stop(rdev);
5858
5859         si_rlc_reset(rdev);
5860
5861         si_init_pg(rdev);
5862
5863         si_init_cg(rdev);
5864
5865         WREG32(RLC_RL_BASE, 0);
5866         WREG32(RLC_RL_SIZE, 0);
5867         WREG32(RLC_LB_CNTL, 0);
5868         WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5869         WREG32(RLC_LB_CNTR_INIT, 0);
5870         WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5871
5872         WREG32(RLC_MC_CNTL, 0);
5873         WREG32(RLC_UCODE_CNTL, 0);
5874
5875         if (rdev->new_fw) {
5876                 const struct rlc_firmware_header_v1_0 *hdr =
5877                         (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5878                 u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5879                 const __le32 *fw_data = (const __le32 *)
5880                         (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5881
5882                 radeon_ucode_print_rlc_hdr(&hdr->header);
5883
5884                 for (i = 0; i < fw_size; i++) {
5885                         WREG32(RLC_UCODE_ADDR, i);
5886                         WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5887                 }
5888         } else {
5889                 const __be32 *fw_data =
5890                         (const __be32 *)rdev->rlc_fw->data;
5891                 for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5892                         WREG32(RLC_UCODE_ADDR, i);
5893                         WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5894                 }
5895         }
5896         WREG32(RLC_UCODE_ADDR, 0);
5897
5898         si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5899
5900         si_rlc_start(rdev);
5901
5902         return 0;
5903 }
5904
5905 static void si_enable_interrupts(struct radeon_device *rdev)
5906 {
5907         u32 ih_cntl = RREG32(IH_CNTL);
5908         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5909
5910         ih_cntl |= ENABLE_INTR;
5911         ih_rb_cntl |= IH_RB_ENABLE;
5912         WREG32(IH_CNTL, ih_cntl);
5913         WREG32(IH_RB_CNTL, ih_rb_cntl);
5914         rdev->ih.enabled = true;
5915 }
5916
5917 static void si_disable_interrupts(struct radeon_device *rdev)
5918 {
5919         u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5920         u32 ih_cntl = RREG32(IH_CNTL);
5921
5922         ih_rb_cntl &= ~IH_RB_ENABLE;
5923         ih_cntl &= ~ENABLE_INTR;
5924         WREG32(IH_RB_CNTL, ih_rb_cntl);
5925         WREG32(IH_CNTL, ih_cntl);
5926         /* set rptr, wptr to 0 */
5927         WREG32(IH_RB_RPTR, 0);
5928         WREG32(IH_RB_WPTR, 0);
5929         rdev->ih.enabled = false;
5930         rdev->ih.rptr = 0;
5931 }
5932
5933 static void si_disable_interrupt_state(struct radeon_device *rdev)
5934 {
5935         u32 tmp;
5936
5937         tmp = RREG32(CP_INT_CNTL_RING0) &
5938                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5939         WREG32(CP_INT_CNTL_RING0, tmp);
5940         WREG32(CP_INT_CNTL_RING1, 0);
5941         WREG32(CP_INT_CNTL_RING2, 0);
5942         tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5943         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5944         tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5945         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5946         WREG32(GRBM_INT_CNTL, 0);
5947         WREG32(SRBM_INT_CNTL, 0);
5948         if (rdev->num_crtc >= 2) {
5949                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5950                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5951         }
5952         if (rdev->num_crtc >= 4) {
5953                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5954                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5955         }
5956         if (rdev->num_crtc >= 6) {
5957                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5958                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5959         }
5960
5961         if (rdev->num_crtc >= 2) {
5962                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5963                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5964         }
5965         if (rdev->num_crtc >= 4) {
5966                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5967                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5968         }
5969         if (rdev->num_crtc >= 6) {
5970                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5971                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5972         }
5973
5974         if (!ASIC_IS_NODCE(rdev)) {
5975                 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5976
5977                 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5978                 WREG32(DC_HPD1_INT_CONTROL, tmp);
5979                 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5980                 WREG32(DC_HPD2_INT_CONTROL, tmp);
5981                 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5982                 WREG32(DC_HPD3_INT_CONTROL, tmp);
5983                 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5984                 WREG32(DC_HPD4_INT_CONTROL, tmp);
5985                 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5986                 WREG32(DC_HPD5_INT_CONTROL, tmp);
5987                 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5988                 WREG32(DC_HPD6_INT_CONTROL, tmp);
5989         }
5990 }
5991
5992 static int si_irq_init(struct radeon_device *rdev)
5993 {
5994         int ret = 0;
5995         int rb_bufsz;
5996         u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5997
5998         /* allocate ring */
5999         ret = r600_ih_ring_alloc(rdev);
6000         if (ret)
6001                 return ret;
6002
6003         /* disable irqs */
6004         si_disable_interrupts(rdev);
6005
6006         /* init rlc */
6007         ret = si_rlc_resume(rdev);
6008         if (ret) {
6009                 r600_ih_ring_fini(rdev);
6010                 return ret;
6011         }
6012
6013         /* setup interrupt control */
6014         /* set dummy read address to ring address */
6015         WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6016         interrupt_cntl = RREG32(INTERRUPT_CNTL);
6017         /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6018          * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6019          */
6020         interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6021         /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6022         interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6023         WREG32(INTERRUPT_CNTL, interrupt_cntl);
6024
6025         WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6026         rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6027
6028         ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6029                       IH_WPTR_OVERFLOW_CLEAR |
6030                       (rb_bufsz << 1));
6031
6032         if (rdev->wb.enabled)
6033                 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6034
6035         /* set the writeback address whether it's enabled or not */
6036         WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6037         WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6038
6039         WREG32(IH_RB_CNTL, ih_rb_cntl);
6040
6041         /* set rptr, wptr to 0 */
6042         WREG32(IH_RB_RPTR, 0);
6043         WREG32(IH_RB_WPTR, 0);
6044
6045         /* Default settings for IH_CNTL (disabled at first) */
6046         ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6047         /* RPTR_REARM only works if msi's are enabled */
6048         if (rdev->msi_enabled)
6049                 ih_cntl |= RPTR_REARM;
6050         WREG32(IH_CNTL, ih_cntl);
6051
6052         /* force the active interrupt state to all disabled */
6053         si_disable_interrupt_state(rdev);
6054
6055         pci_set_master(rdev->pdev);
6056
6057         /* enable irqs */
6058         si_enable_interrupts(rdev);
6059
6060         return ret;
6061 }
6062
6063 int si_irq_set(struct radeon_device *rdev)
6064 {
6065         u32 cp_int_cntl;
6066         u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6067         u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6068         u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6069         u32 grbm_int_cntl = 0;
6070         u32 dma_cntl, dma_cntl1;
6071         u32 thermal_int = 0;
6072
6073         if (!rdev->irq.installed) {
6074                 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6075                 return -EINVAL;
6076         }
6077         /* don't enable anything if the ih is disabled */
6078         if (!rdev->ih.enabled) {
6079                 si_disable_interrupts(rdev);
6080                 /* force the active interrupt state to all disabled */
6081                 si_disable_interrupt_state(rdev);
6082                 return 0;
6083         }
6084
6085         cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6086                 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6087
6088         if (!ASIC_IS_NODCE(rdev)) {
6089                 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6090                 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6091                 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6092                 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6093                 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6094                 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6095         }
6096
6097         dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6098         dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6099
6100         thermal_int = RREG32(CG_THERMAL_INT) &
6101                 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6102
6103         /* enable CP interrupts on all rings */
6104         if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6105                 DRM_DEBUG("si_irq_set: sw int gfx\n");
6106                 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6107         }
6108         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6109                 DRM_DEBUG("si_irq_set: sw int cp1\n");
6110                 cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6111         }
6112         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6113                 DRM_DEBUG("si_irq_set: sw int cp2\n");
6114                 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6115         }
6116         if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6117                 DRM_DEBUG("si_irq_set: sw int dma\n");
6118                 dma_cntl |= TRAP_ENABLE;
6119         }
6120
6121         if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6122                 DRM_DEBUG("si_irq_set: sw int dma1\n");
6123                 dma_cntl1 |= TRAP_ENABLE;
6124         }
6125         if (rdev->irq.crtc_vblank_int[0] ||
6126             atomic_read(&rdev->irq.pflip[0])) {
6127                 DRM_DEBUG("si_irq_set: vblank 0\n");
6128                 crtc1 |= VBLANK_INT_MASK;
6129         }
6130         if (rdev->irq.crtc_vblank_int[1] ||
6131             atomic_read(&rdev->irq.pflip[1])) {
6132                 DRM_DEBUG("si_irq_set: vblank 1\n");
6133                 crtc2 |= VBLANK_INT_MASK;
6134         }
6135         if (rdev->irq.crtc_vblank_int[2] ||
6136             atomic_read(&rdev->irq.pflip[2])) {
6137                 DRM_DEBUG("si_irq_set: vblank 2\n");
6138                 crtc3 |= VBLANK_INT_MASK;
6139         }
6140         if (rdev->irq.crtc_vblank_int[3] ||
6141             atomic_read(&rdev->irq.pflip[3])) {
6142                 DRM_DEBUG("si_irq_set: vblank 3\n");
6143                 crtc4 |= VBLANK_INT_MASK;
6144         }
6145         if (rdev->irq.crtc_vblank_int[4] ||
6146             atomic_read(&rdev->irq.pflip[4])) {
6147                 DRM_DEBUG("si_irq_set: vblank 4\n");
6148                 crtc5 |= VBLANK_INT_MASK;
6149         }
6150         if (rdev->irq.crtc_vblank_int[5] ||
6151             atomic_read(&rdev->irq.pflip[5])) {
6152                 DRM_DEBUG("si_irq_set: vblank 5\n");
6153                 crtc6 |= VBLANK_INT_MASK;
6154         }
6155         if (rdev->irq.hpd[0]) {
6156                 DRM_DEBUG("si_irq_set: hpd 1\n");
6157                 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6158         }
6159         if (rdev->irq.hpd[1]) {
6160                 DRM_DEBUG("si_irq_set: hpd 2\n");
6161                 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6162         }
6163         if (rdev->irq.hpd[2]) {
6164                 DRM_DEBUG("si_irq_set: hpd 3\n");
6165                 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6166         }
6167         if (rdev->irq.hpd[3]) {
6168                 DRM_DEBUG("si_irq_set: hpd 4\n");
6169                 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6170         }
6171         if (rdev->irq.hpd[4]) {
6172                 DRM_DEBUG("si_irq_set: hpd 5\n");
6173                 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6174         }
6175         if (rdev->irq.hpd[5]) {
6176                 DRM_DEBUG("si_irq_set: hpd 6\n");
6177                 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6178         }
6179
6180         WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6181         WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6182         WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6183
6184         WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6185         WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6186
6187         WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6188
6189         if (rdev->irq.dpm_thermal) {
6190                 DRM_DEBUG("dpm thermal\n");
6191                 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6192         }
6193
6194         if (rdev->num_crtc >= 2) {
6195                 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6196                 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6197         }
6198         if (rdev->num_crtc >= 4) {
6199                 WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6200                 WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6201         }
6202         if (rdev->num_crtc >= 6) {
6203                 WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6204                 WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6205         }
6206
6207         if (rdev->num_crtc >= 2) {
6208                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6209                        GRPH_PFLIP_INT_MASK);
6210                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6211                        GRPH_PFLIP_INT_MASK);
6212         }
6213         if (rdev->num_crtc >= 4) {
6214                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6215                        GRPH_PFLIP_INT_MASK);
6216                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6217                        GRPH_PFLIP_INT_MASK);
6218         }
6219         if (rdev->num_crtc >= 6) {
6220                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6221                        GRPH_PFLIP_INT_MASK);
6222                 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6223                        GRPH_PFLIP_INT_MASK);
6224         }
6225
6226         if (!ASIC_IS_NODCE(rdev)) {
6227                 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6228                 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6229                 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6230                 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6231                 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6232                 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6233         }
6234
6235         WREG32(CG_THERMAL_INT, thermal_int);
6236
6237         /* posting read */
6238         RREG32(SRBM_STATUS);
6239
6240         return 0;
6241 }
6242
6243 static inline void si_irq_ack(struct radeon_device *rdev)
6244 {
6245         u32 tmp;
6246
6247         if (ASIC_IS_NODCE(rdev))
6248                 return;
6249
6250         rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6251         rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6252         rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6253         rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6254         rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6255         rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6256         rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6257         rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6258         if (rdev->num_crtc >= 4) {
6259                 rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6260                 rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6261         }
6262         if (rdev->num_crtc >= 6) {
6263                 rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6264                 rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6265         }
6266
6267         if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6268                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6269         if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6270                 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6271         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6272                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6273         if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6274                 WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6275         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6276                 WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6277         if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6278                 WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6279
6280         if (rdev->num_crtc >= 4) {
6281                 if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6282                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6283                 if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6284                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6285                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6286                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6287                 if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6288                         WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6289                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6290                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6291                 if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6292                         WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6293         }
6294
6295         if (rdev->num_crtc >= 6) {
6296                 if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6297                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6298                 if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6299                         WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6300                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6301                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6302                 if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6303                         WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6304                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6305                         WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6306                 if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6307                         WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6308         }
6309
6310         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6311                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6312                 tmp |= DC_HPDx_INT_ACK;
6313                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6314         }
6315         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6316                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6317                 tmp |= DC_HPDx_INT_ACK;
6318                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6319         }
6320         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6321                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6322                 tmp |= DC_HPDx_INT_ACK;
6323                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6324         }
6325         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6326                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6327                 tmp |= DC_HPDx_INT_ACK;
6328                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6329         }
6330         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6331                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6332                 tmp |= DC_HPDx_INT_ACK;
6333                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6334         }
6335         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6336                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6337                 tmp |= DC_HPDx_INT_ACK;
6338                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6339         }
6340
6341         if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6342                 tmp = RREG32(DC_HPD1_INT_CONTROL);
6343                 tmp |= DC_HPDx_RX_INT_ACK;
6344                 WREG32(DC_HPD1_INT_CONTROL, tmp);
6345         }
6346         if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6347                 tmp = RREG32(DC_HPD2_INT_CONTROL);
6348                 tmp |= DC_HPDx_RX_INT_ACK;
6349                 WREG32(DC_HPD2_INT_CONTROL, tmp);
6350         }
6351         if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6352                 tmp = RREG32(DC_HPD3_INT_CONTROL);
6353                 tmp |= DC_HPDx_RX_INT_ACK;
6354                 WREG32(DC_HPD3_INT_CONTROL, tmp);
6355         }
6356         if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6357                 tmp = RREG32(DC_HPD4_INT_CONTROL);
6358                 tmp |= DC_HPDx_RX_INT_ACK;
6359                 WREG32(DC_HPD4_INT_CONTROL, tmp);
6360         }
6361         if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6362                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6363                 tmp |= DC_HPDx_RX_INT_ACK;
6364                 WREG32(DC_HPD5_INT_CONTROL, tmp);
6365         }
6366         if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6367                 tmp = RREG32(DC_HPD5_INT_CONTROL);
6368                 tmp |= DC_HPDx_RX_INT_ACK;
6369                 WREG32(DC_HPD6_INT_CONTROL, tmp);
6370         }
6371 }
6372
6373 static void si_irq_disable(struct radeon_device *rdev)
6374 {
6375         si_disable_interrupts(rdev);
6376         /* Wait and acknowledge irq */
6377         mdelay(1);
6378         si_irq_ack(rdev);
6379         si_disable_interrupt_state(rdev);
6380 }
6381
6382 static void si_irq_suspend(struct radeon_device *rdev)
6383 {
6384         si_irq_disable(rdev);
6385         si_rlc_stop(rdev);
6386 }
6387
6388 static void si_irq_fini(struct radeon_device *rdev)
6389 {
6390         si_irq_suspend(rdev);
6391         r600_ih_ring_fini(rdev);
6392 }
6393
6394 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6395 {
6396         u32 wptr, tmp;
6397
6398         if (rdev->wb.enabled)
6399                 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6400         else
6401                 wptr = RREG32(IH_RB_WPTR);
6402
6403         if (wptr & RB_OVERFLOW) {
6404                 wptr &= ~RB_OVERFLOW;
6405                 /* When a ring buffer overflow happen start parsing interrupt
6406                  * from the last not overwritten vector (wptr + 16). Hopefully
6407                  * this should allow us to catchup.
6408                  */
6409                 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6410                          wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6411                 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6412                 tmp = RREG32(IH_RB_CNTL);
6413                 tmp |= IH_WPTR_OVERFLOW_CLEAR;
6414                 WREG32(IH_RB_CNTL, tmp);
6415         }
6416         return (wptr & rdev->ih.ptr_mask);
6417 }
6418
6419 /*        SI IV Ring
6420  * Each IV ring entry is 128 bits:
6421  * [7:0]    - interrupt source id
6422  * [31:8]   - reserved
6423  * [59:32]  - interrupt source data
6424  * [63:60]  - reserved
6425  * [71:64]  - RINGID
6426  * [79:72]  - VMID
6427  * [127:80] - reserved
6428  */
6429 int si_irq_process(struct radeon_device *rdev)
6430 {
6431         u32 wptr;
6432         u32 rptr;
6433         u32 src_id, src_data, ring_id;
6434         u32 ring_index;
6435         bool queue_hotplug = false;
6436         bool queue_dp = false;
6437         bool queue_thermal = false;
6438         u32 status, addr;
6439
6440         if (!rdev->ih.enabled || rdev->shutdown)
6441                 return IRQ_NONE;
6442
6443         wptr = si_get_ih_wptr(rdev);
6444
6445 restart_ih:
6446         /* is somebody else already processing irqs? */
6447         if (atomic_xchg(&rdev->ih.lock, 1))
6448                 return IRQ_NONE;
6449
6450         rptr = rdev->ih.rptr;
6451         DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6452
6453         /* Order reading of wptr vs. reading of IH ring data */
6454         rmb();
6455
6456         /* display interrupts */
6457         si_irq_ack(rdev);
6458
6459         while (rptr != wptr) {
6460                 /* wptr/rptr are in bytes! */
6461                 ring_index = rptr / 4;
6462                 src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6463                 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6464                 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6465
6466                 switch (src_id) {
6467                 case 1: /* D1 vblank/vline */
6468                         switch (src_data) {
6469                         case 0: /* D1 vblank */
6470                                 if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6471                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6472
6473                                 if (rdev->irq.crtc_vblank_int[0]) {
6474                                         drm_handle_vblank(rdev->ddev, 0);
6475                                         rdev->pm.vblank_sync = true;
6476                                         wake_up(&rdev->irq.vblank_queue);
6477                                 }
6478                                 if (atomic_read(&rdev->irq.pflip[0]))
6479                                         radeon_crtc_handle_vblank(rdev, 0);
6480                                 rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6481                                 DRM_DEBUG("IH: D1 vblank\n");
6482
6483                                 break;
6484                         case 1: /* D1 vline */
6485                                 if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6486                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6487
6488                                 rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6489                                 DRM_DEBUG("IH: D1 vline\n");
6490
6491                                 break;
6492                         default:
6493                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6494                                 break;
6495                         }
6496                         break;
6497                 case 2: /* D2 vblank/vline */
6498                         switch (src_data) {
6499                         case 0: /* D2 vblank */
6500                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6501                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6502
6503                                 if (rdev->irq.crtc_vblank_int[1]) {
6504                                         drm_handle_vblank(rdev->ddev, 1);
6505                                         rdev->pm.vblank_sync = true;
6506                                         wake_up(&rdev->irq.vblank_queue);
6507                                 }
6508                                 if (atomic_read(&rdev->irq.pflip[1]))
6509                                         radeon_crtc_handle_vblank(rdev, 1);
6510                                 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6511                                 DRM_DEBUG("IH: D2 vblank\n");
6512
6513                                 break;
6514                         case 1: /* D2 vline */
6515                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6516                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6517
6518                                 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6519                                 DRM_DEBUG("IH: D2 vline\n");
6520
6521                                 break;
6522                         default:
6523                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6524                                 break;
6525                         }
6526                         break;
6527                 case 3: /* D3 vblank/vline */
6528                         switch (src_data) {
6529                         case 0: /* D3 vblank */
6530                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6531                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6532
6533                                 if (rdev->irq.crtc_vblank_int[2]) {
6534                                         drm_handle_vblank(rdev->ddev, 2);
6535                                         rdev->pm.vblank_sync = true;
6536                                         wake_up(&rdev->irq.vblank_queue);
6537                                 }
6538                                 if (atomic_read(&rdev->irq.pflip[2]))
6539                                         radeon_crtc_handle_vblank(rdev, 2);
6540                                 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6541                                 DRM_DEBUG("IH: D3 vblank\n");
6542
6543                                 break;
6544                         case 1: /* D3 vline */
6545                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6546                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6547
6548                                 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6549                                 DRM_DEBUG("IH: D3 vline\n");
6550
6551                                 break;
6552                         default:
6553                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6554                                 break;
6555                         }
6556                         break;
6557                 case 4: /* D4 vblank/vline */
6558                         switch (src_data) {
6559                         case 0: /* D4 vblank */
6560                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6561                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6562
6563                                 if (rdev->irq.crtc_vblank_int[3]) {
6564                                         drm_handle_vblank(rdev->ddev, 3);
6565                                         rdev->pm.vblank_sync = true;
6566                                         wake_up(&rdev->irq.vblank_queue);
6567                                 }
6568                                 if (atomic_read(&rdev->irq.pflip[3]))
6569                                         radeon_crtc_handle_vblank(rdev, 3);
6570                                 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6571                                 DRM_DEBUG("IH: D4 vblank\n");
6572
6573                                 break;
6574                         case 1: /* D4 vline */
6575                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6576                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6577
6578                                 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6579                                 DRM_DEBUG("IH: D4 vline\n");
6580
6581                                 break;
6582                         default:
6583                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6584                                 break;
6585                         }
6586                         break;
6587                 case 5: /* D5 vblank/vline */
6588                         switch (src_data) {
6589                         case 0: /* D5 vblank */
6590                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6591                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6592
6593                                 if (rdev->irq.crtc_vblank_int[4]) {
6594                                         drm_handle_vblank(rdev->ddev, 4);
6595                                         rdev->pm.vblank_sync = true;
6596                                         wake_up(&rdev->irq.vblank_queue);
6597                                 }
6598                                 if (atomic_read(&rdev->irq.pflip[4]))
6599                                         radeon_crtc_handle_vblank(rdev, 4);
6600                                 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6601                                 DRM_DEBUG("IH: D5 vblank\n");
6602
6603                                 break;
6604                         case 1: /* D5 vline */
6605                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6606                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6607
6608                                 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6609                                 DRM_DEBUG("IH: D5 vline\n");
6610
6611                                 break;
6612                         default:
6613                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6614                                 break;
6615                         }
6616                         break;
6617                 case 6: /* D6 vblank/vline */
6618                         switch (src_data) {
6619                         case 0: /* D6 vblank */
6620                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6621                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6622
6623                                 if (rdev->irq.crtc_vblank_int[5]) {
6624                                         drm_handle_vblank(rdev->ddev, 5);
6625                                         rdev->pm.vblank_sync = true;
6626                                         wake_up(&rdev->irq.vblank_queue);
6627                                 }
6628                                 if (atomic_read(&rdev->irq.pflip[5]))
6629                                         radeon_crtc_handle_vblank(rdev, 5);
6630                                 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6631                                 DRM_DEBUG("IH: D6 vblank\n");
6632
6633                                 break;
6634                         case 1: /* D6 vline */
6635                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6636                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6637
6638                                 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6639                                 DRM_DEBUG("IH: D6 vline\n");
6640
6641                                 break;
6642                         default:
6643                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6644                                 break;
6645                         }
6646                         break;
6647                 case 8: /* D1 page flip */
6648                 case 10: /* D2 page flip */
6649                 case 12: /* D3 page flip */
6650                 case 14: /* D4 page flip */
6651                 case 16: /* D5 page flip */
6652                 case 18: /* D6 page flip */
6653                         DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6654                         if (radeon_use_pflipirq > 0)
6655                                 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6656                         break;
6657                 case 42: /* HPD hotplug */
6658                         switch (src_data) {
6659                         case 0:
6660                                 if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6661                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6662
6663                                 rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6664                                 queue_hotplug = true;
6665                                 DRM_DEBUG("IH: HPD1\n");
6666
6667                                 break;
6668                         case 1:
6669                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6670                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6671
6672                                 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6673                                 queue_hotplug = true;
6674                                 DRM_DEBUG("IH: HPD2\n");
6675
6676                                 break;
6677                         case 2:
6678                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6679                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6680
6681                                 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6682                                 queue_hotplug = true;
6683                                 DRM_DEBUG("IH: HPD3\n");
6684
6685                                 break;
6686                         case 3:
6687                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6688                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6689
6690                                 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6691                                 queue_hotplug = true;
6692                                 DRM_DEBUG("IH: HPD4\n");
6693
6694                                 break;
6695                         case 4:
6696                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6697                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6698
6699                                 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6700                                 queue_hotplug = true;
6701                                 DRM_DEBUG("IH: HPD5\n");
6702
6703                                 break;
6704                         case 5:
6705                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6706                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6707
6708                                 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6709                                 queue_hotplug = true;
6710                                 DRM_DEBUG("IH: HPD6\n");
6711
6712                                 break;
6713                         case 6:
6714                                 if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6715                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6716
6717                                 rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6718                                 queue_dp = true;
6719                                 DRM_DEBUG("IH: HPD_RX 1\n");
6720
6721                                 break;
6722                         case 7:
6723                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6724                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6725
6726                                 rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6727                                 queue_dp = true;
6728                                 DRM_DEBUG("IH: HPD_RX 2\n");
6729
6730                                 break;
6731                         case 8:
6732                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6733                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6734
6735                                 rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6736                                 queue_dp = true;
6737                                 DRM_DEBUG("IH: HPD_RX 3\n");
6738
6739                                 break;
6740                         case 9:
6741                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6742                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6743
6744                                 rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6745                                 queue_dp = true;
6746                                 DRM_DEBUG("IH: HPD_RX 4\n");
6747
6748                                 break;
6749                         case 10:
6750                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6751                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6752
6753                                 rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6754                                 queue_dp = true;
6755                                 DRM_DEBUG("IH: HPD_RX 5\n");
6756
6757                                 break;
6758                         case 11:
6759                                 if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6760                                         DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6761
6762                                 rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6763                                 queue_dp = true;
6764                                 DRM_DEBUG("IH: HPD_RX 6\n");
6765
6766                                 break;
6767                         default:
6768                                 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6769                                 break;
6770                         }
6771                         break;
6772                 case 96:
6773                         DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6774                         WREG32(SRBM_INT_ACK, 0x1);
6775                         break;
6776                 case 124: /* UVD */
6777                         DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6778                         radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6779                         break;
6780                 case 146:
6781                 case 147:
6782                         addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6783                         status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6784                         /* reset addr and status */
6785                         WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6786                         if (addr == 0x0 && status == 0x0)
6787                                 break;
6788                         dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6789                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6790                                 addr);
6791                         dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6792                                 status);
6793                         si_vm_decode_fault(rdev, status, addr);
6794                         break;
6795                 case 176: /* RINGID0 CP_INT */
6796                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6797                         break;
6798                 case 177: /* RINGID1 CP_INT */
6799                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6800                         break;
6801                 case 178: /* RINGID2 CP_INT */
6802                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6803                         break;
6804                 case 181: /* CP EOP event */
6805                         DRM_DEBUG("IH: CP EOP\n");
6806                         switch (ring_id) {
6807                         case 0:
6808                                 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6809                                 break;
6810                         case 1:
6811                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6812                                 break;
6813                         case 2:
6814                                 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6815                                 break;
6816                         }
6817                         break;
6818                 case 224: /* DMA trap event */
6819                         DRM_DEBUG("IH: DMA trap\n");
6820                         radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6821                         break;
6822                 case 230: /* thermal low to high */
6823                         DRM_DEBUG("IH: thermal low to high\n");
6824                         rdev->pm.dpm.thermal.high_to_low = false;
6825                         queue_thermal = true;
6826                         break;
6827                 case 231: /* thermal high to low */
6828                         DRM_DEBUG("IH: thermal high to low\n");
6829                         rdev->pm.dpm.thermal.high_to_low = true;
6830                         queue_thermal = true;
6831                         break;
6832                 case 233: /* GUI IDLE */
6833                         DRM_DEBUG("IH: GUI idle\n");
6834                         break;
6835                 case 244: /* DMA trap event */
6836                         DRM_DEBUG("IH: DMA1 trap\n");
6837                         radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6838                         break;
6839                 default:
6840                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6841                         break;
6842                 }
6843
6844                 /* wptr/rptr are in bytes! */
6845                 rptr += 16;
6846                 rptr &= rdev->ih.ptr_mask;
6847                 WREG32(IH_RB_RPTR, rptr);
6848         }
6849         if (queue_dp)
6850                 schedule_work(&rdev->dp_work);
6851         if (queue_hotplug)
6852                 schedule_delayed_work(&rdev->hotplug_work, 0);
6853         if (queue_thermal && rdev->pm.dpm_enabled)
6854                 schedule_work(&rdev->pm.dpm.thermal.work);
6855         rdev->ih.rptr = rptr;
6856         atomic_set(&rdev->ih.lock, 0);
6857
6858         /* make sure wptr hasn't changed while processing */
6859         wptr = si_get_ih_wptr(rdev);
6860         if (wptr != rptr)
6861                 goto restart_ih;
6862
6863         return IRQ_HANDLED;
6864 }
6865
6866 /*
6867  * startup/shutdown callbacks
6868  */
6869 static void si_uvd_init(struct radeon_device *rdev)
6870 {
6871         int r;
6872
6873         if (!rdev->has_uvd)
6874                 return;
6875
6876         r = radeon_uvd_init(rdev);
6877         if (r) {
6878                 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6879                 /*
6880                  * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6881                  * to early fails uvd_v2_2_resume() and thus nothing happens
6882                  * there. So it is pointless to try to go through that code
6883                  * hence why we disable uvd here.
6884                  */
6885                 rdev->has_uvd = 0;
6886                 return;
6887         }
6888         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6889         r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6890 }
6891
6892 static void si_uvd_start(struct radeon_device *rdev)
6893 {
6894         int r;
6895
6896         if (!rdev->has_uvd)
6897                 return;
6898
6899         r = uvd_v2_2_resume(rdev);
6900         if (r) {
6901                 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6902                 goto error;
6903         }
6904         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6905         if (r) {
6906                 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6907                 goto error;
6908         }
6909         return;
6910
6911 error:
6912         rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6913 }
6914
6915 static void si_uvd_resume(struct radeon_device *rdev)
6916 {
6917         struct radeon_ring *ring;
6918         int r;
6919
6920         if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6921                 return;
6922
6923         ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6924         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6925         if (r) {
6926                 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6927                 return;
6928         }
6929         r = uvd_v1_0_init(rdev);
6930         if (r) {
6931                 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6932                 return;
6933         }
6934 }
6935
6936 static void si_vce_init(struct radeon_device *rdev)
6937 {
6938         int r;
6939
6940         if (!rdev->has_vce)
6941                 return;
6942
6943         r = radeon_vce_init(rdev);
6944         if (r) {
6945                 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6946                 /*
6947                  * At this point rdev->vce.vcpu_bo is NULL which trickles down
6948                  * to early fails si_vce_start() and thus nothing happens
6949                  * there. So it is pointless to try to go through that code
6950                  * hence why we disable vce here.
6951                  */
6952                 rdev->has_vce = 0;
6953                 return;
6954         }
6955         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6956         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6957         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6958         r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6959 }
6960
6961 static void si_vce_start(struct radeon_device *rdev)
6962 {
6963         int r;
6964
6965         if (!rdev->has_vce)
6966                 return;
6967
6968         r = radeon_vce_resume(rdev);
6969         if (r) {
6970                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6971                 goto error;
6972         }
6973         r = vce_v1_0_resume(rdev);
6974         if (r) {
6975                 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6976                 goto error;
6977         }
6978         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6979         if (r) {
6980                 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6981                 goto error;
6982         }
6983         r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6984         if (r) {
6985                 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6986                 goto error;
6987         }
6988         return;
6989
6990 error:
6991         rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6992         rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6993 }
6994
6995 static void si_vce_resume(struct radeon_device *rdev)
6996 {
6997         struct radeon_ring *ring;
6998         int r;
6999
7000         if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
7001                 return;
7002
7003         ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
7004         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7005         if (r) {
7006                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7007                 return;
7008         }
7009         ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
7010         r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
7011         if (r) {
7012                 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7013                 return;
7014         }
7015         r = vce_v1_0_init(rdev);
7016         if (r) {
7017                 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
7018                 return;
7019         }
7020 }
7021
7022 static int si_startup(struct radeon_device *rdev)
7023 {
7024         struct radeon_ring *ring;
7025         int r;
7026
7027         /* enable pcie gen2/3 link */
7028         si_pcie_gen3_enable(rdev);
7029         /* enable aspm */
7030         si_program_aspm(rdev);
7031
7032         /* scratch needs to be initialized before MC */
7033         r = r600_vram_scratch_init(rdev);
7034         if (r)
7035                 return r;
7036
7037         si_mc_program(rdev);
7038
7039         if (!rdev->pm.dpm_enabled) {
7040                 r = si_mc_load_microcode(rdev);
7041                 if (r) {
7042                         DRM_ERROR("Failed to load MC firmware!\n");
7043                         return r;
7044                 }
7045         }
7046
7047         r = si_pcie_gart_enable(rdev);
7048         if (r)
7049                 return r;
7050         si_gpu_init(rdev);
7051
7052         /* allocate rlc buffers */
7053         if (rdev->family == CHIP_VERDE) {
7054                 rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7055                 rdev->rlc.reg_list_size =
7056                         (u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7057         }
7058         rdev->rlc.cs_data = si_cs_data;
7059         r = sumo_rlc_init(rdev);
7060         if (r) {
7061                 DRM_ERROR("Failed to init rlc BOs!\n");
7062                 return r;
7063         }
7064
7065         /* allocate wb buffer */
7066         r = radeon_wb_init(rdev);
7067         if (r)
7068                 return r;
7069
7070         r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7071         if (r) {
7072                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7073                 return r;
7074         }
7075
7076         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7077         if (r) {
7078                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7079                 return r;
7080         }
7081
7082         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7083         if (r) {
7084                 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7085                 return r;
7086         }
7087
7088         r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7089         if (r) {
7090                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7091                 return r;
7092         }
7093
7094         r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7095         if (r) {
7096                 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7097                 return r;
7098         }
7099
7100         si_uvd_start(rdev);
7101         si_vce_start(rdev);
7102
7103         /* Enable IRQ */
7104         if (!rdev->irq.installed) {
7105                 r = radeon_irq_kms_init(rdev);
7106                 if (r)
7107                         return r;
7108         }
7109
7110         r = si_irq_init(rdev);
7111         if (r) {
7112                 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7113                 radeon_irq_kms_fini(rdev);
7114                 return r;
7115         }
7116         si_irq_set(rdev);
7117
7118         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7119         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7120                              RADEON_CP_PACKET2);
7121         if (r)
7122                 return r;
7123
7124         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7125         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7126                              RADEON_CP_PACKET2);
7127         if (r)
7128                 return r;
7129
7130         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7131         r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7132                              RADEON_CP_PACKET2);
7133         if (r)
7134                 return r;
7135
7136         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7137         r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7138                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7139         if (r)
7140                 return r;
7141
7142         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7143         r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7144                              DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7145         if (r)
7146                 return r;
7147
7148         r = si_cp_load_microcode(rdev);
7149         if (r)
7150                 return r;
7151         r = si_cp_resume(rdev);
7152         if (r)
7153                 return r;
7154
7155         r = cayman_dma_resume(rdev);
7156         if (r)
7157                 return r;
7158
7159         si_uvd_resume(rdev);
7160         si_vce_resume(rdev);
7161
7162         r = radeon_ib_pool_init(rdev);
7163         if (r) {
7164                 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7165                 return r;
7166         }
7167
7168         r = radeon_vm_manager_init(rdev);
7169         if (r) {
7170                 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7171                 return r;
7172         }
7173
7174         r = radeon_audio_init(rdev);
7175         if (r)
7176                 return r;
7177
7178         return 0;
7179 }
7180
7181 int si_resume(struct radeon_device *rdev)
7182 {
7183         int r;
7184
7185         /* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7186          * posting will perform necessary task to bring back GPU into good
7187          * shape.
7188          */
7189         /* post card */
7190         atom_asic_init(rdev->mode_info.atom_context);
7191
7192         /* init golden registers */
7193         si_init_golden_registers(rdev);
7194
7195         if (rdev->pm.pm_method == PM_METHOD_DPM)
7196                 radeon_pm_resume(rdev);
7197
7198         rdev->accel_working = true;
7199         r = si_startup(rdev);
7200         if (r) {
7201                 DRM_ERROR("si startup failed on resume\n");
7202                 rdev->accel_working = false;
7203                 return r;
7204         }
7205
7206         return r;
7207
7208 }
7209
7210 int si_suspend(struct radeon_device *rdev)
7211 {
7212         radeon_pm_suspend(rdev);
7213         radeon_audio_fini(rdev);
7214         radeon_vm_manager_fini(rdev);
7215         si_cp_enable(rdev, false);
7216         cayman_dma_stop(rdev);
7217         if (rdev->has_uvd) {
7218                 uvd_v1_0_fini(rdev);
7219                 radeon_uvd_suspend(rdev);
7220         }
7221         if (rdev->has_vce)
7222                 radeon_vce_suspend(rdev);
7223         si_fini_pg(rdev);
7224         si_fini_cg(rdev);
7225         si_irq_suspend(rdev);
7226         radeon_wb_disable(rdev);
7227         si_pcie_gart_disable(rdev);
7228         return 0;
7229 }
7230
7231 /* Plan is to move initialization in that function and use
7232  * helper function so that radeon_device_init pretty much
7233  * do nothing more than calling asic specific function. This
7234  * should also allow to remove a bunch of callback function
7235  * like vram_info.
7236  */
7237 int si_init(struct radeon_device *rdev)
7238 {
7239         struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7240         int r;
7241
7242         /* Read BIOS */
7243         if (!radeon_get_bios(rdev)) {
7244                 if (ASIC_IS_AVIVO(rdev))
7245                         return -EINVAL;
7246         }
7247         /* Must be an ATOMBIOS */
7248         if (!rdev->is_atom_bios) {
7249                 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7250                 return -EINVAL;
7251         }
7252         r = radeon_atombios_init(rdev);
7253         if (r)
7254                 return r;
7255
7256         /* Post card if necessary */
7257         if (!radeon_card_posted(rdev)) {
7258                 if (!rdev->bios) {
7259                         dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7260                         return -EINVAL;
7261                 }
7262                 DRM_INFO("GPU not posted. posting now...\n");
7263                 atom_asic_init(rdev->mode_info.atom_context);
7264         }
7265         /* init golden registers */
7266         si_init_golden_registers(rdev);
7267         /* Initialize scratch registers */
7268         si_scratch_init(rdev);
7269         /* Initialize surface registers */
7270         radeon_surface_init(rdev);
7271         /* Initialize clocks */
7272         radeon_get_clock_info(rdev->ddev);
7273
7274         /* Fence driver */
7275         r = radeon_fence_driver_init(rdev);
7276         if (r)
7277                 return r;
7278
7279         /* initialize memory controller */
7280         r = si_mc_init(rdev);
7281         if (r)
7282                 return r;
7283         /* Memory manager */
7284         r = radeon_bo_init(rdev);
7285         if (r)
7286                 return r;
7287
7288         if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7289             !rdev->rlc_fw || !rdev->mc_fw) {
7290                 r = si_init_microcode(rdev);
7291                 if (r) {
7292                         DRM_ERROR("Failed to load firmware!\n");
7293                         return r;
7294                 }
7295         }
7296
7297         /* Initialize power management */
7298         radeon_pm_init(rdev);
7299
7300         ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7301         ring->ring_obj = NULL;
7302         r600_ring_init(rdev, ring, 1024 * 1024);
7303
7304         ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7305         ring->ring_obj = NULL;
7306         r600_ring_init(rdev, ring, 1024 * 1024);
7307
7308         ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7309         ring->ring_obj = NULL;
7310         r600_ring_init(rdev, ring, 1024 * 1024);
7311
7312         ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7313         ring->ring_obj = NULL;
7314         r600_ring_init(rdev, ring, 64 * 1024);
7315
7316         ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7317         ring->ring_obj = NULL;
7318         r600_ring_init(rdev, ring, 64 * 1024);
7319
7320         si_uvd_init(rdev);
7321         si_vce_init(rdev);
7322
7323         rdev->ih.ring_obj = NULL;
7324         r600_ih_ring_init(rdev, 64 * 1024);
7325
7326         r = r600_pcie_gart_init(rdev);
7327         if (r)
7328                 return r;
7329
7330         rdev->accel_working = true;
7331         r = si_startup(rdev);
7332         if (r) {
7333                 dev_err(rdev->dev, "disabling GPU acceleration\n");
7334                 si_cp_fini(rdev);
7335                 cayman_dma_fini(rdev);
7336                 si_irq_fini(rdev);
7337                 sumo_rlc_fini(rdev);
7338                 radeon_wb_fini(rdev);
7339                 radeon_ib_pool_fini(rdev);
7340                 radeon_vm_manager_fini(rdev);
7341                 radeon_irq_kms_fini(rdev);
7342                 si_pcie_gart_fini(rdev);
7343                 rdev->accel_working = false;
7344         }
7345
7346         /* Don't start up if the MC ucode is missing.
7347          * The default clocks and voltages before the MC ucode
7348          * is loaded are not suffient for advanced operations.
7349          */
7350         if (!rdev->mc_fw) {
7351                 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7352                 return -EINVAL;
7353         }
7354
7355         return 0;
7356 }
7357
7358 void si_fini(struct radeon_device *rdev)
7359 {
7360         radeon_pm_fini(rdev);
7361         si_cp_fini(rdev);
7362         cayman_dma_fini(rdev);
7363         si_fini_pg(rdev);
7364         si_fini_cg(rdev);
7365         si_irq_fini(rdev);
7366         sumo_rlc_fini(rdev);
7367         radeon_wb_fini(rdev);
7368         radeon_vm_manager_fini(rdev);
7369         radeon_ib_pool_fini(rdev);
7370         radeon_irq_kms_fini(rdev);
7371         if (rdev->has_uvd) {
7372                 uvd_v1_0_fini(rdev);
7373                 radeon_uvd_fini(rdev);
7374         }
7375         if (rdev->has_vce)
7376                 radeon_vce_fini(rdev);
7377         si_pcie_gart_fini(rdev);
7378         r600_vram_scratch_fini(rdev);
7379         radeon_gem_fini(rdev);
7380         radeon_fence_driver_fini(rdev);
7381         radeon_bo_fini(rdev);
7382         radeon_atombios_fini(rdev);
7383         kfree(rdev->bios);
7384         rdev->bios = NULL;
7385 }
7386
7387 /**
7388  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7389  *
7390  * @rdev: radeon_device pointer
7391  *
7392  * Fetches a GPU clock counter snapshot (SI).
7393  * Returns the 64 bit clock counter snapshot.
7394  */
7395 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7396 {
7397         uint64_t clock;
7398
7399         mutex_lock(&rdev->gpu_clock_mutex);
7400         WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7401         clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7402                 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7403         mutex_unlock(&rdev->gpu_clock_mutex);
7404         return clock;
7405 }
7406
7407 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7408 {
7409         unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7410         int r;
7411
7412         /* bypass vclk and dclk with bclk */
7413         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7414                 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7415                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7416
7417         /* put PLL in bypass mode */
7418         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7419
7420         if (!vclk || !dclk) {
7421                 /* keep the Bypass mode */
7422                 return 0;
7423         }
7424
7425         r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7426                                           16384, 0x03FFFFFF, 0, 128, 5,
7427                                           &fb_div, &vclk_div, &dclk_div);
7428         if (r)
7429                 return r;
7430
7431         /* set RESET_ANTI_MUX to 0 */
7432         WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7433
7434         /* set VCO_MODE to 1 */
7435         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7436
7437         /* disable sleep mode */
7438         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7439
7440         /* deassert UPLL_RESET */
7441         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7442
7443         mdelay(1);
7444
7445         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7446         if (r)
7447                 return r;
7448
7449         /* assert UPLL_RESET again */
7450         WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7451
7452         /* disable spread spectrum. */
7453         WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7454
7455         /* set feedback divider */
7456         WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7457
7458         /* set ref divider to 0 */
7459         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7460
7461         if (fb_div < 307200)
7462                 WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7463         else
7464                 WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7465
7466         /* set PDIV_A and PDIV_B */
7467         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7468                 UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7469                 ~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7470
7471         /* give the PLL some time to settle */
7472         mdelay(15);
7473
7474         /* deassert PLL_RESET */
7475         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7476
7477         mdelay(15);
7478
7479         /* switch from bypass mode to normal mode */
7480         WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7481
7482         r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7483         if (r)
7484                 return r;
7485
7486         /* switch VCLK and DCLK selection */
7487         WREG32_P(CG_UPLL_FUNC_CNTL_2,
7488                 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7489                 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7490
7491         mdelay(100);
7492
7493         return 0;
7494 }
7495
7496 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7497 {
7498         struct pci_dev *root = rdev->pdev->bus->self;
7499         int bridge_pos, gpu_pos;
7500         u32 speed_cntl, mask, current_data_rate;
7501         int ret, i;
7502         u16 tmp16;
7503
7504         if (pci_is_root_bus(rdev->pdev->bus))
7505                 return;
7506
7507         if (radeon_pcie_gen2 == 0)
7508                 return;
7509
7510         if (rdev->flags & RADEON_IS_IGP)
7511                 return;
7512
7513         if (!(rdev->flags & RADEON_IS_PCIE))
7514                 return;
7515
7516         ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7517         if (ret != 0)
7518                 return;
7519
7520         if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7521                 return;
7522
7523         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7524         current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7525                 LC_CURRENT_DATA_RATE_SHIFT;
7526         if (mask & DRM_PCIE_SPEED_80) {
7527                 if (current_data_rate == 2) {
7528                         DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7529                         return;
7530                 }
7531                 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7532         } else if (mask & DRM_PCIE_SPEED_50) {
7533                 if (current_data_rate == 1) {
7534                         DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7535                         return;
7536                 }
7537                 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7538         }
7539
7540         bridge_pos = pci_pcie_cap(root);
7541         if (!bridge_pos)
7542                 return;
7543
7544         gpu_pos = pci_pcie_cap(rdev->pdev);
7545         if (!gpu_pos)
7546                 return;
7547
7548         if (mask & DRM_PCIE_SPEED_80) {
7549                 /* re-try equalization if gen3 is not already enabled */
7550                 if (current_data_rate != 2) {
7551                         u16 bridge_cfg, gpu_cfg;
7552                         u16 bridge_cfg2, gpu_cfg2;
7553                         u32 max_lw, current_lw, tmp;
7554
7555                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7556                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7557
7558                         tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7559                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7560
7561                         tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7562                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7563
7564                         tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7565                         max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7566                         current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7567
7568                         if (current_lw < max_lw) {
7569                                 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7570                                 if (tmp & LC_RENEGOTIATION_SUPPORT) {
7571                                         tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7572                                         tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7573                                         tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7574                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7575                                 }
7576                         }
7577
7578                         for (i = 0; i < 10; i++) {
7579                                 /* check status */
7580                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7581                                 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7582                                         break;
7583
7584                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7585                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7586
7587                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7588                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7589
7590                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7591                                 tmp |= LC_SET_QUIESCE;
7592                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7593
7594                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7595                                 tmp |= LC_REDO_EQ;
7596                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7597
7598                                 mdelay(100);
7599
7600                                 /* linkctl */
7601                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7602                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7603                                 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7604                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7605
7606                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7607                                 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7608                                 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7609                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7610
7611                                 /* linkctl2 */
7612                                 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7613                                 tmp16 &= ~((1 << 4) | (7 << 9));
7614                                 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7615                                 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7616
7617                                 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7618                                 tmp16 &= ~((1 << 4) | (7 << 9));
7619                                 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7620                                 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7621
7622                                 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7623                                 tmp &= ~LC_SET_QUIESCE;
7624                                 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7625                         }
7626                 }
7627         }
7628
7629         /* set the link speed */
7630         speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7631         speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7632         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7633
7634         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7635         tmp16 &= ~0xf;
7636         if (mask & DRM_PCIE_SPEED_80)
7637                 tmp16 |= 3; /* gen3 */
7638         else if (mask & DRM_PCIE_SPEED_50)
7639                 tmp16 |= 2; /* gen2 */
7640         else
7641                 tmp16 |= 1; /* gen1 */
7642         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7643
7644         speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7645         speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7646         WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7647
7648         for (i = 0; i < rdev->usec_timeout; i++) {
7649                 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7650                 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7651                         break;
7652                 udelay(1);
7653         }
7654 }
7655
7656 static void si_program_aspm(struct radeon_device *rdev)
7657 {
7658         u32 data, orig;
7659         bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7660         bool disable_clkreq = false;
7661
7662         if (radeon_aspm == 0)
7663                 return;
7664
7665         if (!(rdev->flags & RADEON_IS_PCIE))
7666                 return;
7667
7668         orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7669         data &= ~LC_XMIT_N_FTS_MASK;
7670         data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7671         if (orig != data)
7672                 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7673
7674         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7675         data |= LC_GO_TO_RECOVERY;
7676         if (orig != data)
7677                 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7678
7679         orig = data = RREG32_PCIE(PCIE_P_CNTL);
7680         data |= P_IGNORE_EDB_ERR;
7681         if (orig != data)
7682                 WREG32_PCIE(PCIE_P_CNTL, data);
7683
7684         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7685         data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7686         data |= LC_PMI_TO_L1_DIS;
7687         if (!disable_l0s)
7688                 data |= LC_L0S_INACTIVITY(7);
7689
7690         if (!disable_l1) {
7691                 data |= LC_L1_INACTIVITY(7);
7692                 data &= ~LC_PMI_TO_L1_DIS;
7693                 if (orig != data)
7694                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7695
7696                 if (!disable_plloff_in_l1) {
7697                         bool clk_req_support;
7698
7699                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7700                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7701                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7702                         if (orig != data)
7703                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7704
7705                         orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7706                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7707                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7708                         if (orig != data)
7709                                 WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7710
7711                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7712                         data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7713                         data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7714                         if (orig != data)
7715                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7716
7717                         orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7718                         data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7719                         data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7720                         if (orig != data)
7721                                 WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7722
7723                         if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7724                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7725                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7726                                 if (orig != data)
7727                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7728
7729                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7730                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7731                                 if (orig != data)
7732                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7733
7734                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7735                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7736                                 if (orig != data)
7737                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7738
7739                                 orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7740                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7741                                 if (orig != data)
7742                                         WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7743
7744                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7745                                 data &= ~PLL_RAMP_UP_TIME_0_MASK;
7746                                 if (orig != data)
7747                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7748
7749                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7750                                 data &= ~PLL_RAMP_UP_TIME_1_MASK;
7751                                 if (orig != data)
7752                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7753
7754                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7755                                 data &= ~PLL_RAMP_UP_TIME_2_MASK;
7756                                 if (orig != data)
7757                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7758
7759                                 orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7760                                 data &= ~PLL_RAMP_UP_TIME_3_MASK;
7761                                 if (orig != data)
7762                                         WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7763                         }
7764                         orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7765                         data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7766                         data |= LC_DYN_LANES_PWR_STATE(3);
7767                         if (orig != data)
7768                                 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7769
7770                         orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7771                         data &= ~LS2_EXIT_TIME_MASK;
7772                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7773                                 data |= LS2_EXIT_TIME(5);
7774                         if (orig != data)
7775                                 WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7776
7777                         orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7778                         data &= ~LS2_EXIT_TIME_MASK;
7779                         if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7780                                 data |= LS2_EXIT_TIME(5);
7781                         if (orig != data)
7782                                 WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7783
7784                         if (!disable_clkreq &&
7785                             !pci_is_root_bus(rdev->pdev->bus)) {
7786                                 struct pci_dev *root = rdev->pdev->bus->self;
7787                                 u32 lnkcap;
7788
7789                                 clk_req_support = false;
7790                                 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7791                                 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7792                                         clk_req_support = true;
7793                         } else {
7794                                 clk_req_support = false;
7795                         }
7796
7797                         if (clk_req_support) {
7798                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7799                                 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7800                                 if (orig != data)
7801                                         WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7802
7803                                 orig = data = RREG32(THM_CLK_CNTL);
7804                                 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7805                                 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7806                                 if (orig != data)
7807                                         WREG32(THM_CLK_CNTL, data);
7808
7809                                 orig = data = RREG32(MISC_CLK_CNTL);
7810                                 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7811                                 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7812                                 if (orig != data)
7813                                         WREG32(MISC_CLK_CNTL, data);
7814
7815                                 orig = data = RREG32(CG_CLKPIN_CNTL);
7816                                 data &= ~BCLK_AS_XCLK;
7817                                 if (orig != data)
7818                                         WREG32(CG_CLKPIN_CNTL, data);
7819
7820                                 orig = data = RREG32(CG_CLKPIN_CNTL_2);
7821                                 data &= ~FORCE_BIF_REFCLK_EN;
7822                                 if (orig != data)
7823                                         WREG32(CG_CLKPIN_CNTL_2, data);
7824
7825                                 orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7826                                 data &= ~MPLL_CLKOUT_SEL_MASK;
7827                                 data |= MPLL_CLKOUT_SEL(4);
7828                                 if (orig != data)
7829                                         WREG32(MPLL_BYPASSCLK_SEL, data);
7830
7831                                 orig = data = RREG32(SPLL_CNTL_MODE);
7832                                 data &= ~SPLL_REFCLK_SEL_MASK;
7833                                 if (orig != data)
7834                                         WREG32(SPLL_CNTL_MODE, data);
7835                         }
7836                 }
7837         } else {
7838                 if (orig != data)
7839                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7840         }
7841
7842         orig = data = RREG32_PCIE(PCIE_CNTL2);
7843         data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7844         if (orig != data)
7845                 WREG32_PCIE(PCIE_CNTL2, data);
7846
7847         if (!disable_l0s) {
7848                 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7849                 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7850                         data = RREG32_PCIE(PCIE_LC_STATUS1);
7851                         if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7852                                 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7853                                 data &= ~LC_L0S_INACTIVITY_MASK;
7854                                 if (orig != data)
7855                                         WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7856                         }
7857                 }
7858         }
7859 }
7860
7861 int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7862 {
7863         unsigned i;
7864
7865         /* make sure VCEPLL_CTLREQ is deasserted */
7866         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7867
7868         mdelay(10);
7869
7870         /* assert UPLL_CTLREQ */
7871         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7872
7873         /* wait for CTLACK and CTLACK2 to get asserted */
7874         for (i = 0; i < 100; ++i) {
7875                 uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7876                 if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7877                         break;
7878                 mdelay(10);
7879         }
7880
7881         /* deassert UPLL_CTLREQ */
7882         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7883
7884         if (i == 100) {
7885                 DRM_ERROR("Timeout setting UVD clocks!\n");
7886                 return -ETIMEDOUT;
7887         }
7888
7889         return 0;
7890 }
7891
7892 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7893 {
7894         unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7895         int r;
7896
7897         /* bypass evclk and ecclk with bclk */
7898         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7899                      EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7900                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7901
7902         /* put PLL in bypass mode */
7903         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7904                      ~VCEPLL_BYPASS_EN_MASK);
7905
7906         if (!evclk || !ecclk) {
7907                 /* keep the Bypass mode, put PLL to sleep */
7908                 WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7909                              ~VCEPLL_SLEEP_MASK);
7910                 return 0;
7911         }
7912
7913         r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7914                                           16384, 0x03FFFFFF, 0, 128, 5,
7915                                           &fb_div, &evclk_div, &ecclk_div);
7916         if (r)
7917                 return r;
7918
7919         /* set RESET_ANTI_MUX to 0 */
7920         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7921
7922         /* set VCO_MODE to 1 */
7923         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7924                      ~VCEPLL_VCO_MODE_MASK);
7925
7926         /* toggle VCEPLL_SLEEP to 1 then back to 0 */
7927         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7928                      ~VCEPLL_SLEEP_MASK);
7929         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7930
7931         /* deassert VCEPLL_RESET */
7932         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7933
7934         mdelay(1);
7935
7936         r = si_vce_send_vcepll_ctlreq(rdev);
7937         if (r)
7938                 return r;
7939
7940         /* assert VCEPLL_RESET again */
7941         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7942
7943         /* disable spread spectrum. */
7944         WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7945
7946         /* set feedback divider */
7947         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7948
7949         /* set ref divider to 0 */
7950         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7951
7952         /* set PDIV_A and PDIV_B */
7953         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7954                      VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7955                      ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7956
7957         /* give the PLL some time to settle */
7958         mdelay(15);
7959
7960         /* deassert PLL_RESET */
7961         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7962
7963         mdelay(15);
7964
7965         /* switch from bypass mode to normal mode */
7966         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7967
7968         r = si_vce_send_vcepll_ctlreq(rdev);
7969         if (r)
7970                 return r;
7971
7972         /* switch VCLK and DCLK selection */
7973         WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7974                      EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7975                      ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7976
7977         mdelay(100);
7978
7979         return 0;
7980 }