Merge branch 'akpm' (patches from Andrew)
[cascardo/linux.git] / drivers / gpu / drm / i915 / intel_pm.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eugeni Dodonov <eugeni.dodonov@intel.com>
25  *
26  */
27
28 #include <linux/cpufreq.h>
29 #include "i915_drv.h"
30 #include "intel_drv.h"
31 #include "../../../platform/x86/intel_ips.h"
32 #include <linux/module.h>
33
34 /**
35  * RC6 is a special power stage which allows the GPU to enter an very
36  * low-voltage mode when idle, using down to 0V while at this stage.  This
37  * stage is entered automatically when the GPU is idle when RC6 support is
38  * enabled, and as soon as new workload arises GPU wakes up automatically as well.
39  *
40  * There are different RC6 modes available in Intel GPU, which differentiate
41  * among each other with the latency required to enter and leave RC6 and
42  * voltage consumed by the GPU in different states.
43  *
44  * The combination of the following flags define which states GPU is allowed
45  * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
46  * RC6pp is deepest RC6. Their support by hardware varies according to the
47  * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
48  * which brings the most power savings; deeper states save more power, but
49  * require higher latency to switch to and wake up.
50  */
51 #define INTEL_RC6_ENABLE                        (1<<0)
52 #define INTEL_RC6p_ENABLE                       (1<<1)
53 #define INTEL_RC6pp_ENABLE                      (1<<2)
54
55 static void gen9_init_clock_gating(struct drm_device *dev)
56 {
57         struct drm_i915_private *dev_priv = dev->dev_private;
58
59         /*
60          * WaDisableSDEUnitClockGating:skl
61          * This seems to be a pre-production w/a.
62          */
63         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
64                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
65
66         /*
67          * WaDisableDgMirrorFixInHalfSliceChicken5:skl
68          * This is a pre-production w/a.
69          */
70         I915_WRITE(GEN9_HALF_SLICE_CHICKEN5,
71                    I915_READ(GEN9_HALF_SLICE_CHICKEN5) &
72                    ~GEN9_DG_MIRROR_FIX_ENABLE);
73
74         /* Wa4x4STCOptimizationDisable:skl */
75         I915_WRITE(CACHE_MODE_1,
76                    _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
77 }
78
79 static void i915_pineview_get_mem_freq(struct drm_device *dev)
80 {
81         struct drm_i915_private *dev_priv = dev->dev_private;
82         u32 tmp;
83
84         tmp = I915_READ(CLKCFG);
85
86         switch (tmp & CLKCFG_FSB_MASK) {
87         case CLKCFG_FSB_533:
88                 dev_priv->fsb_freq = 533; /* 133*4 */
89                 break;
90         case CLKCFG_FSB_800:
91                 dev_priv->fsb_freq = 800; /* 200*4 */
92                 break;
93         case CLKCFG_FSB_667:
94                 dev_priv->fsb_freq =  667; /* 167*4 */
95                 break;
96         case CLKCFG_FSB_400:
97                 dev_priv->fsb_freq = 400; /* 100*4 */
98                 break;
99         }
100
101         switch (tmp & CLKCFG_MEM_MASK) {
102         case CLKCFG_MEM_533:
103                 dev_priv->mem_freq = 533;
104                 break;
105         case CLKCFG_MEM_667:
106                 dev_priv->mem_freq = 667;
107                 break;
108         case CLKCFG_MEM_800:
109                 dev_priv->mem_freq = 800;
110                 break;
111         }
112
113         /* detect pineview DDR3 setting */
114         tmp = I915_READ(CSHRDDR3CTL);
115         dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0;
116 }
117
118 static void i915_ironlake_get_mem_freq(struct drm_device *dev)
119 {
120         struct drm_i915_private *dev_priv = dev->dev_private;
121         u16 ddrpll, csipll;
122
123         ddrpll = I915_READ16(DDRMPLL1);
124         csipll = I915_READ16(CSIPLL0);
125
126         switch (ddrpll & 0xff) {
127         case 0xc:
128                 dev_priv->mem_freq = 800;
129                 break;
130         case 0x10:
131                 dev_priv->mem_freq = 1066;
132                 break;
133         case 0x14:
134                 dev_priv->mem_freq = 1333;
135                 break;
136         case 0x18:
137                 dev_priv->mem_freq = 1600;
138                 break;
139         default:
140                 DRM_DEBUG_DRIVER("unknown memory frequency 0x%02x\n",
141                                  ddrpll & 0xff);
142                 dev_priv->mem_freq = 0;
143                 break;
144         }
145
146         dev_priv->ips.r_t = dev_priv->mem_freq;
147
148         switch (csipll & 0x3ff) {
149         case 0x00c:
150                 dev_priv->fsb_freq = 3200;
151                 break;
152         case 0x00e:
153                 dev_priv->fsb_freq = 3733;
154                 break;
155         case 0x010:
156                 dev_priv->fsb_freq = 4266;
157                 break;
158         case 0x012:
159                 dev_priv->fsb_freq = 4800;
160                 break;
161         case 0x014:
162                 dev_priv->fsb_freq = 5333;
163                 break;
164         case 0x016:
165                 dev_priv->fsb_freq = 5866;
166                 break;
167         case 0x018:
168                 dev_priv->fsb_freq = 6400;
169                 break;
170         default:
171                 DRM_DEBUG_DRIVER("unknown fsb frequency 0x%04x\n",
172                                  csipll & 0x3ff);
173                 dev_priv->fsb_freq = 0;
174                 break;
175         }
176
177         if (dev_priv->fsb_freq == 3200) {
178                 dev_priv->ips.c_m = 0;
179         } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
180                 dev_priv->ips.c_m = 1;
181         } else {
182                 dev_priv->ips.c_m = 2;
183         }
184 }
185
186 static const struct cxsr_latency cxsr_latency_table[] = {
187         {1, 0, 800, 400, 3382, 33382, 3983, 33983},    /* DDR2-400 SC */
188         {1, 0, 800, 667, 3354, 33354, 3807, 33807},    /* DDR2-667 SC */
189         {1, 0, 800, 800, 3347, 33347, 3763, 33763},    /* DDR2-800 SC */
190         {1, 1, 800, 667, 6420, 36420, 6873, 36873},    /* DDR3-667 SC */
191         {1, 1, 800, 800, 5902, 35902, 6318, 36318},    /* DDR3-800 SC */
192
193         {1, 0, 667, 400, 3400, 33400, 4021, 34021},    /* DDR2-400 SC */
194         {1, 0, 667, 667, 3372, 33372, 3845, 33845},    /* DDR2-667 SC */
195         {1, 0, 667, 800, 3386, 33386, 3822, 33822},    /* DDR2-800 SC */
196         {1, 1, 667, 667, 6438, 36438, 6911, 36911},    /* DDR3-667 SC */
197         {1, 1, 667, 800, 5941, 35941, 6377, 36377},    /* DDR3-800 SC */
198
199         {1, 0, 400, 400, 3472, 33472, 4173, 34173},    /* DDR2-400 SC */
200         {1, 0, 400, 667, 3443, 33443, 3996, 33996},    /* DDR2-667 SC */
201         {1, 0, 400, 800, 3430, 33430, 3946, 33946},    /* DDR2-800 SC */
202         {1, 1, 400, 667, 6509, 36509, 7062, 37062},    /* DDR3-667 SC */
203         {1, 1, 400, 800, 5985, 35985, 6501, 36501},    /* DDR3-800 SC */
204
205         {0, 0, 800, 400, 3438, 33438, 4065, 34065},    /* DDR2-400 SC */
206         {0, 0, 800, 667, 3410, 33410, 3889, 33889},    /* DDR2-667 SC */
207         {0, 0, 800, 800, 3403, 33403, 3845, 33845},    /* DDR2-800 SC */
208         {0, 1, 800, 667, 6476, 36476, 6955, 36955},    /* DDR3-667 SC */
209         {0, 1, 800, 800, 5958, 35958, 6400, 36400},    /* DDR3-800 SC */
210
211         {0, 0, 667, 400, 3456, 33456, 4103, 34106},    /* DDR2-400 SC */
212         {0, 0, 667, 667, 3428, 33428, 3927, 33927},    /* DDR2-667 SC */
213         {0, 0, 667, 800, 3443, 33443, 3905, 33905},    /* DDR2-800 SC */
214         {0, 1, 667, 667, 6494, 36494, 6993, 36993},    /* DDR3-667 SC */
215         {0, 1, 667, 800, 5998, 35998, 6460, 36460},    /* DDR3-800 SC */
216
217         {0, 0, 400, 400, 3528, 33528, 4255, 34255},    /* DDR2-400 SC */
218         {0, 0, 400, 667, 3500, 33500, 4079, 34079},    /* DDR2-667 SC */
219         {0, 0, 400, 800, 3487, 33487, 4029, 34029},    /* DDR2-800 SC */
220         {0, 1, 400, 667, 6566, 36566, 7145, 37145},    /* DDR3-667 SC */
221         {0, 1, 400, 800, 6042, 36042, 6584, 36584},    /* DDR3-800 SC */
222 };
223
224 static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop,
225                                                          int is_ddr3,
226                                                          int fsb,
227                                                          int mem)
228 {
229         const struct cxsr_latency *latency;
230         int i;
231
232         if (fsb == 0 || mem == 0)
233                 return NULL;
234
235         for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
236                 latency = &cxsr_latency_table[i];
237                 if (is_desktop == latency->is_desktop &&
238                     is_ddr3 == latency->is_ddr3 &&
239                     fsb == latency->fsb_freq && mem == latency->mem_freq)
240                         return latency;
241         }
242
243         DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
244
245         return NULL;
246 }
247
248 void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable)
249 {
250         struct drm_device *dev = dev_priv->dev;
251         u32 val;
252
253         if (IS_VALLEYVIEW(dev)) {
254                 I915_WRITE(FW_BLC_SELF_VLV, enable ? FW_CSPWRDWNEN : 0);
255         } else if (IS_G4X(dev) || IS_CRESTLINE(dev)) {
256                 I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0);
257         } else if (IS_PINEVIEW(dev)) {
258                 val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN;
259                 val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0;
260                 I915_WRITE(DSPFW3, val);
261         } else if (IS_I945G(dev) || IS_I945GM(dev)) {
262                 val = enable ? _MASKED_BIT_ENABLE(FW_BLC_SELF_EN) :
263                                _MASKED_BIT_DISABLE(FW_BLC_SELF_EN);
264                 I915_WRITE(FW_BLC_SELF, val);
265         } else if (IS_I915GM(dev)) {
266                 val = enable ? _MASKED_BIT_ENABLE(INSTPM_SELF_EN) :
267                                _MASKED_BIT_DISABLE(INSTPM_SELF_EN);
268                 I915_WRITE(INSTPM, val);
269         } else {
270                 return;
271         }
272
273         DRM_DEBUG_KMS("memory self-refresh is %s\n",
274                       enable ? "enabled" : "disabled");
275 }
276
277 /*
278  * Latency for FIFO fetches is dependent on several factors:
279  *   - memory configuration (speed, channels)
280  *   - chipset
281  *   - current MCH state
282  * It can be fairly high in some situations, so here we assume a fairly
283  * pessimal value.  It's a tradeoff between extra memory fetches (if we
284  * set this value too high, the FIFO will fetch frequently to stay full)
285  * and power consumption (set it too low to save power and we might see
286  * FIFO underruns and display "flicker").
287  *
288  * A value of 5us seems to be a good balance; safe for very low end
289  * platforms but not overly aggressive on lower latency configs.
290  */
291 static const int pessimal_latency_ns = 5000;
292
293 static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
294 {
295         struct drm_i915_private *dev_priv = dev->dev_private;
296         uint32_t dsparb = I915_READ(DSPARB);
297         int size;
298
299         size = dsparb & 0x7f;
300         if (plane)
301                 size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) - size;
302
303         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
304                       plane ? "B" : "A", size);
305
306         return size;
307 }
308
309 static int i830_get_fifo_size(struct drm_device *dev, int plane)
310 {
311         struct drm_i915_private *dev_priv = dev->dev_private;
312         uint32_t dsparb = I915_READ(DSPARB);
313         int size;
314
315         size = dsparb & 0x1ff;
316         if (plane)
317                 size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) - size;
318         size >>= 1; /* Convert to cachelines */
319
320         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
321                       plane ? "B" : "A", size);
322
323         return size;
324 }
325
326 static int i845_get_fifo_size(struct drm_device *dev, int plane)
327 {
328         struct drm_i915_private *dev_priv = dev->dev_private;
329         uint32_t dsparb = I915_READ(DSPARB);
330         int size;
331
332         size = dsparb & 0x7f;
333         size >>= 2; /* Convert to cachelines */
334
335         DRM_DEBUG_KMS("FIFO size - (0x%08x) %s: %d\n", dsparb,
336                       plane ? "B" : "A",
337                       size);
338
339         return size;
340 }
341
342 /* Pineview has different values for various configs */
343 static const struct intel_watermark_params pineview_display_wm = {
344         .fifo_size = PINEVIEW_DISPLAY_FIFO,
345         .max_wm = PINEVIEW_MAX_WM,
346         .default_wm = PINEVIEW_DFT_WM,
347         .guard_size = PINEVIEW_GUARD_WM,
348         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
349 };
350 static const struct intel_watermark_params pineview_display_hplloff_wm = {
351         .fifo_size = PINEVIEW_DISPLAY_FIFO,
352         .max_wm = PINEVIEW_MAX_WM,
353         .default_wm = PINEVIEW_DFT_HPLLOFF_WM,
354         .guard_size = PINEVIEW_GUARD_WM,
355         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
356 };
357 static const struct intel_watermark_params pineview_cursor_wm = {
358         .fifo_size = PINEVIEW_CURSOR_FIFO,
359         .max_wm = PINEVIEW_CURSOR_MAX_WM,
360         .default_wm = PINEVIEW_CURSOR_DFT_WM,
361         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
362         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
363 };
364 static const struct intel_watermark_params pineview_cursor_hplloff_wm = {
365         .fifo_size = PINEVIEW_CURSOR_FIFO,
366         .max_wm = PINEVIEW_CURSOR_MAX_WM,
367         .default_wm = PINEVIEW_CURSOR_DFT_WM,
368         .guard_size = PINEVIEW_CURSOR_GUARD_WM,
369         .cacheline_size = PINEVIEW_FIFO_LINE_SIZE,
370 };
371 static const struct intel_watermark_params g4x_wm_info = {
372         .fifo_size = G4X_FIFO_SIZE,
373         .max_wm = G4X_MAX_WM,
374         .default_wm = G4X_MAX_WM,
375         .guard_size = 2,
376         .cacheline_size = G4X_FIFO_LINE_SIZE,
377 };
378 static const struct intel_watermark_params g4x_cursor_wm_info = {
379         .fifo_size = I965_CURSOR_FIFO,
380         .max_wm = I965_CURSOR_MAX_WM,
381         .default_wm = I965_CURSOR_DFT_WM,
382         .guard_size = 2,
383         .cacheline_size = G4X_FIFO_LINE_SIZE,
384 };
385 static const struct intel_watermark_params valleyview_wm_info = {
386         .fifo_size = VALLEYVIEW_FIFO_SIZE,
387         .max_wm = VALLEYVIEW_MAX_WM,
388         .default_wm = VALLEYVIEW_MAX_WM,
389         .guard_size = 2,
390         .cacheline_size = G4X_FIFO_LINE_SIZE,
391 };
392 static const struct intel_watermark_params valleyview_cursor_wm_info = {
393         .fifo_size = I965_CURSOR_FIFO,
394         .max_wm = VALLEYVIEW_CURSOR_MAX_WM,
395         .default_wm = I965_CURSOR_DFT_WM,
396         .guard_size = 2,
397         .cacheline_size = G4X_FIFO_LINE_SIZE,
398 };
399 static const struct intel_watermark_params i965_cursor_wm_info = {
400         .fifo_size = I965_CURSOR_FIFO,
401         .max_wm = I965_CURSOR_MAX_WM,
402         .default_wm = I965_CURSOR_DFT_WM,
403         .guard_size = 2,
404         .cacheline_size = I915_FIFO_LINE_SIZE,
405 };
406 static const struct intel_watermark_params i945_wm_info = {
407         .fifo_size = I945_FIFO_SIZE,
408         .max_wm = I915_MAX_WM,
409         .default_wm = 1,
410         .guard_size = 2,
411         .cacheline_size = I915_FIFO_LINE_SIZE,
412 };
413 static const struct intel_watermark_params i915_wm_info = {
414         .fifo_size = I915_FIFO_SIZE,
415         .max_wm = I915_MAX_WM,
416         .default_wm = 1,
417         .guard_size = 2,
418         .cacheline_size = I915_FIFO_LINE_SIZE,
419 };
420 static const struct intel_watermark_params i830_a_wm_info = {
421         .fifo_size = I855GM_FIFO_SIZE,
422         .max_wm = I915_MAX_WM,
423         .default_wm = 1,
424         .guard_size = 2,
425         .cacheline_size = I830_FIFO_LINE_SIZE,
426 };
427 static const struct intel_watermark_params i830_bc_wm_info = {
428         .fifo_size = I855GM_FIFO_SIZE,
429         .max_wm = I915_MAX_WM/2,
430         .default_wm = 1,
431         .guard_size = 2,
432         .cacheline_size = I830_FIFO_LINE_SIZE,
433 };
434 static const struct intel_watermark_params i845_wm_info = {
435         .fifo_size = I830_FIFO_SIZE,
436         .max_wm = I915_MAX_WM,
437         .default_wm = 1,
438         .guard_size = 2,
439         .cacheline_size = I830_FIFO_LINE_SIZE,
440 };
441
442 /**
443  * intel_calculate_wm - calculate watermark level
444  * @clock_in_khz: pixel clock
445  * @wm: chip FIFO params
446  * @pixel_size: display pixel size
447  * @latency_ns: memory latency for the platform
448  *
449  * Calculate the watermark level (the level at which the display plane will
450  * start fetching from memory again).  Each chip has a different display
451  * FIFO size and allocation, so the caller needs to figure that out and pass
452  * in the correct intel_watermark_params structure.
453  *
454  * As the pixel clock runs, the FIFO will be drained at a rate that depends
455  * on the pixel size.  When it reaches the watermark level, it'll start
456  * fetching FIFO line sized based chunks from memory until the FIFO fills
457  * past the watermark point.  If the FIFO drains completely, a FIFO underrun
458  * will occur, and a display engine hang could result.
459  */
460 static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
461                                         const struct intel_watermark_params *wm,
462                                         int fifo_size,
463                                         int pixel_size,
464                                         unsigned long latency_ns)
465 {
466         long entries_required, wm_size;
467
468         /*
469          * Note: we need to make sure we don't overflow for various clock &
470          * latency values.
471          * clocks go from a few thousand to several hundred thousand.
472          * latency is usually a few thousand
473          */
474         entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
475                 1000;
476         entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
477
478         DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
479
480         wm_size = fifo_size - (entries_required + wm->guard_size);
481
482         DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
483
484         /* Don't promote wm_size to unsigned... */
485         if (wm_size > (long)wm->max_wm)
486                 wm_size = wm->max_wm;
487         if (wm_size <= 0)
488                 wm_size = wm->default_wm;
489
490         /*
491          * Bspec seems to indicate that the value shouldn't be lower than
492          * 'burst size + 1'. Certainly 830 is quite unhappy with low values.
493          * Lets go for 8 which is the burst size since certain platforms
494          * already use a hardcoded 8 (which is what the spec says should be
495          * done).
496          */
497         if (wm_size <= 8)
498                 wm_size = 8;
499
500         return wm_size;
501 }
502
503 static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
504 {
505         struct drm_crtc *crtc, *enabled = NULL;
506
507         for_each_crtc(dev, crtc) {
508                 if (intel_crtc_active(crtc)) {
509                         if (enabled)
510                                 return NULL;
511                         enabled = crtc;
512                 }
513         }
514
515         return enabled;
516 }
517
518 static void pineview_update_wm(struct drm_crtc *unused_crtc)
519 {
520         struct drm_device *dev = unused_crtc->dev;
521         struct drm_i915_private *dev_priv = dev->dev_private;
522         struct drm_crtc *crtc;
523         const struct cxsr_latency *latency;
524         u32 reg;
525         unsigned long wm;
526
527         latency = intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3,
528                                          dev_priv->fsb_freq, dev_priv->mem_freq);
529         if (!latency) {
530                 DRM_DEBUG_KMS("Unknown FSB/MEM found, disable CxSR\n");
531                 intel_set_memory_cxsr(dev_priv, false);
532                 return;
533         }
534
535         crtc = single_enabled_crtc(dev);
536         if (crtc) {
537                 const struct drm_display_mode *adjusted_mode;
538                 int pixel_size = crtc->primary->fb->bits_per_pixel / 8;
539                 int clock;
540
541                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
542                 clock = adjusted_mode->crtc_clock;
543
544                 /* Display SR */
545                 wm = intel_calculate_wm(clock, &pineview_display_wm,
546                                         pineview_display_wm.fifo_size,
547                                         pixel_size, latency->display_sr);
548                 reg = I915_READ(DSPFW1);
549                 reg &= ~DSPFW_SR_MASK;
550                 reg |= wm << DSPFW_SR_SHIFT;
551                 I915_WRITE(DSPFW1, reg);
552                 DRM_DEBUG_KMS("DSPFW1 register is %x\n", reg);
553
554                 /* cursor SR */
555                 wm = intel_calculate_wm(clock, &pineview_cursor_wm,
556                                         pineview_display_wm.fifo_size,
557                                         pixel_size, latency->cursor_sr);
558                 reg = I915_READ(DSPFW3);
559                 reg &= ~DSPFW_CURSOR_SR_MASK;
560                 reg |= (wm & 0x3f) << DSPFW_CURSOR_SR_SHIFT;
561                 I915_WRITE(DSPFW3, reg);
562
563                 /* Display HPLL off SR */
564                 wm = intel_calculate_wm(clock, &pineview_display_hplloff_wm,
565                                         pineview_display_hplloff_wm.fifo_size,
566                                         pixel_size, latency->display_hpll_disable);
567                 reg = I915_READ(DSPFW3);
568                 reg &= ~DSPFW_HPLL_SR_MASK;
569                 reg |= wm & DSPFW_HPLL_SR_MASK;
570                 I915_WRITE(DSPFW3, reg);
571
572                 /* cursor HPLL off SR */
573                 wm = intel_calculate_wm(clock, &pineview_cursor_hplloff_wm,
574                                         pineview_display_hplloff_wm.fifo_size,
575                                         pixel_size, latency->cursor_hpll_disable);
576                 reg = I915_READ(DSPFW3);
577                 reg &= ~DSPFW_HPLL_CURSOR_MASK;
578                 reg |= (wm & 0x3f) << DSPFW_HPLL_CURSOR_SHIFT;
579                 I915_WRITE(DSPFW3, reg);
580                 DRM_DEBUG_KMS("DSPFW3 register is %x\n", reg);
581
582                 intel_set_memory_cxsr(dev_priv, true);
583         } else {
584                 intel_set_memory_cxsr(dev_priv, false);
585         }
586 }
587
588 static bool g4x_compute_wm0(struct drm_device *dev,
589                             int plane,
590                             const struct intel_watermark_params *display,
591                             int display_latency_ns,
592                             const struct intel_watermark_params *cursor,
593                             int cursor_latency_ns,
594                             int *plane_wm,
595                             int *cursor_wm)
596 {
597         struct drm_crtc *crtc;
598         const struct drm_display_mode *adjusted_mode;
599         int htotal, hdisplay, clock, pixel_size;
600         int line_time_us, line_count;
601         int entries, tlb_miss;
602
603         crtc = intel_get_crtc_for_plane(dev, plane);
604         if (!intel_crtc_active(crtc)) {
605                 *cursor_wm = cursor->guard_size;
606                 *plane_wm = display->guard_size;
607                 return false;
608         }
609
610         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
611         clock = adjusted_mode->crtc_clock;
612         htotal = adjusted_mode->crtc_htotal;
613         hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
614         pixel_size = crtc->primary->fb->bits_per_pixel / 8;
615
616         /* Use the small buffer method to calculate plane watermark */
617         entries = ((clock * pixel_size / 1000) * display_latency_ns) / 1000;
618         tlb_miss = display->fifo_size*display->cacheline_size - hdisplay * 8;
619         if (tlb_miss > 0)
620                 entries += tlb_miss;
621         entries = DIV_ROUND_UP(entries, display->cacheline_size);
622         *plane_wm = entries + display->guard_size;
623         if (*plane_wm > (int)display->max_wm)
624                 *plane_wm = display->max_wm;
625
626         /* Use the large buffer method to calculate cursor watermark */
627         line_time_us = max(htotal * 1000 / clock, 1);
628         line_count = (cursor_latency_ns / line_time_us + 1000) / 1000;
629         entries = line_count * to_intel_crtc(crtc)->cursor_width * pixel_size;
630         tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8;
631         if (tlb_miss > 0)
632                 entries += tlb_miss;
633         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
634         *cursor_wm = entries + cursor->guard_size;
635         if (*cursor_wm > (int)cursor->max_wm)
636                 *cursor_wm = (int)cursor->max_wm;
637
638         return true;
639 }
640
641 /*
642  * Check the wm result.
643  *
644  * If any calculated watermark values is larger than the maximum value that
645  * can be programmed into the associated watermark register, that watermark
646  * must be disabled.
647  */
648 static bool g4x_check_srwm(struct drm_device *dev,
649                            int display_wm, int cursor_wm,
650                            const struct intel_watermark_params *display,
651                            const struct intel_watermark_params *cursor)
652 {
653         DRM_DEBUG_KMS("SR watermark: display plane %d, cursor %d\n",
654                       display_wm, cursor_wm);
655
656         if (display_wm > display->max_wm) {
657                 DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
658                               display_wm, display->max_wm);
659                 return false;
660         }
661
662         if (cursor_wm > cursor->max_wm) {
663                 DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
664                               cursor_wm, cursor->max_wm);
665                 return false;
666         }
667
668         if (!(display_wm || cursor_wm)) {
669                 DRM_DEBUG_KMS("SR latency is 0, disabling\n");
670                 return false;
671         }
672
673         return true;
674 }
675
676 static bool g4x_compute_srwm(struct drm_device *dev,
677                              int plane,
678                              int latency_ns,
679                              const struct intel_watermark_params *display,
680                              const struct intel_watermark_params *cursor,
681                              int *display_wm, int *cursor_wm)
682 {
683         struct drm_crtc *crtc;
684         const struct drm_display_mode *adjusted_mode;
685         int hdisplay, htotal, pixel_size, clock;
686         unsigned long line_time_us;
687         int line_count, line_size;
688         int small, large;
689         int entries;
690
691         if (!latency_ns) {
692                 *display_wm = *cursor_wm = 0;
693                 return false;
694         }
695
696         crtc = intel_get_crtc_for_plane(dev, plane);
697         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
698         clock = adjusted_mode->crtc_clock;
699         htotal = adjusted_mode->crtc_htotal;
700         hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
701         pixel_size = crtc->primary->fb->bits_per_pixel / 8;
702
703         line_time_us = max(htotal * 1000 / clock, 1);
704         line_count = (latency_ns / line_time_us + 1000) / 1000;
705         line_size = hdisplay * pixel_size;
706
707         /* Use the minimum of the small and large buffer method for primary */
708         small = ((clock * pixel_size / 1000) * latency_ns) / 1000;
709         large = line_count * line_size;
710
711         entries = DIV_ROUND_UP(min(small, large), display->cacheline_size);
712         *display_wm = entries + display->guard_size;
713
714         /* calculate the self-refresh watermark for display cursor */
715         entries = line_count * pixel_size * to_intel_crtc(crtc)->cursor_width;
716         entries = DIV_ROUND_UP(entries, cursor->cacheline_size);
717         *cursor_wm = entries + cursor->guard_size;
718
719         return g4x_check_srwm(dev,
720                               *display_wm, *cursor_wm,
721                               display, cursor);
722 }
723
724 static bool vlv_compute_drain_latency(struct drm_crtc *crtc,
725                                       int pixel_size,
726                                       int *prec_mult,
727                                       int *drain_latency)
728 {
729         struct drm_device *dev = crtc->dev;
730         int entries;
731         int clock = to_intel_crtc(crtc)->config->base.adjusted_mode.crtc_clock;
732
733         if (WARN(clock == 0, "Pixel clock is zero!\n"))
734                 return false;
735
736         if (WARN(pixel_size == 0, "Pixel size is zero!\n"))
737                 return false;
738
739         entries = DIV_ROUND_UP(clock, 1000) * pixel_size;
740         if (IS_CHERRYVIEW(dev))
741                 *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_32 :
742                                                DRAIN_LATENCY_PRECISION_16;
743         else
744                 *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 :
745                                                DRAIN_LATENCY_PRECISION_32;
746         *drain_latency = (64 * (*prec_mult) * 4) / entries;
747
748         if (*drain_latency > DRAIN_LATENCY_MASK)
749                 *drain_latency = DRAIN_LATENCY_MASK;
750
751         return true;
752 }
753
754 /*
755  * Update drain latency registers of memory arbiter
756  *
757  * Valleyview SoC has a new memory arbiter and needs drain latency registers
758  * to be programmed. Each plane has a drain latency multiplier and a drain
759  * latency value.
760  */
761
762 static void vlv_update_drain_latency(struct drm_crtc *crtc)
763 {
764         struct drm_device *dev = crtc->dev;
765         struct drm_i915_private *dev_priv = dev->dev_private;
766         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
767         int pixel_size;
768         int drain_latency;
769         enum pipe pipe = intel_crtc->pipe;
770         int plane_prec, prec_mult, plane_dl;
771         const int high_precision = IS_CHERRYVIEW(dev) ?
772                 DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64;
773
774         plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_HIGH |
775                    DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_HIGH |
776                    (DRAIN_LATENCY_MASK << DDL_CURSOR_SHIFT));
777
778         if (!intel_crtc_active(crtc)) {
779                 I915_WRITE(VLV_DDL(pipe), plane_dl);
780                 return;
781         }
782
783         /* Primary plane Drain Latency */
784         pixel_size = crtc->primary->fb->bits_per_pixel / 8;     /* BPP */
785         if (vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) {
786                 plane_prec = (prec_mult == high_precision) ?
787                                            DDL_PLANE_PRECISION_HIGH :
788                                            DDL_PLANE_PRECISION_LOW;
789                 plane_dl |= plane_prec | drain_latency;
790         }
791
792         /* Cursor Drain Latency
793          * BPP is always 4 for cursor
794          */
795         pixel_size = 4;
796
797         /* Program cursor DL only if it is enabled */
798         if (intel_crtc->cursor_base &&
799             vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) {
800                 plane_prec = (prec_mult == high_precision) ?
801                                            DDL_CURSOR_PRECISION_HIGH :
802                                            DDL_CURSOR_PRECISION_LOW;
803                 plane_dl |= plane_prec | (drain_latency << DDL_CURSOR_SHIFT);
804         }
805
806         I915_WRITE(VLV_DDL(pipe), plane_dl);
807 }
808
809 #define single_plane_enabled(mask) is_power_of_2(mask)
810
811 static void valleyview_update_wm(struct drm_crtc *crtc)
812 {
813         struct drm_device *dev = crtc->dev;
814         static const int sr_latency_ns = 12000;
815         struct drm_i915_private *dev_priv = dev->dev_private;
816         int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
817         int plane_sr, cursor_sr;
818         int ignore_plane_sr, ignore_cursor_sr;
819         unsigned int enabled = 0;
820         bool cxsr_enabled;
821
822         vlv_update_drain_latency(crtc);
823
824         if (g4x_compute_wm0(dev, PIPE_A,
825                             &valleyview_wm_info, pessimal_latency_ns,
826                             &valleyview_cursor_wm_info, pessimal_latency_ns,
827                             &planea_wm, &cursora_wm))
828                 enabled |= 1 << PIPE_A;
829
830         if (g4x_compute_wm0(dev, PIPE_B,
831                             &valleyview_wm_info, pessimal_latency_ns,
832                             &valleyview_cursor_wm_info, pessimal_latency_ns,
833                             &planeb_wm, &cursorb_wm))
834                 enabled |= 1 << PIPE_B;
835
836         if (single_plane_enabled(enabled) &&
837             g4x_compute_srwm(dev, ffs(enabled) - 1,
838                              sr_latency_ns,
839                              &valleyview_wm_info,
840                              &valleyview_cursor_wm_info,
841                              &plane_sr, &ignore_cursor_sr) &&
842             g4x_compute_srwm(dev, ffs(enabled) - 1,
843                              2*sr_latency_ns,
844                              &valleyview_wm_info,
845                              &valleyview_cursor_wm_info,
846                              &ignore_plane_sr, &cursor_sr)) {
847                 cxsr_enabled = true;
848         } else {
849                 cxsr_enabled = false;
850                 intel_set_memory_cxsr(dev_priv, false);
851                 plane_sr = cursor_sr = 0;
852         }
853
854         DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
855                       "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
856                       planea_wm, cursora_wm,
857                       planeb_wm, cursorb_wm,
858                       plane_sr, cursor_sr);
859
860         I915_WRITE(DSPFW1,
861                    (plane_sr << DSPFW_SR_SHIFT) |
862                    (cursorb_wm << DSPFW_CURSORB_SHIFT) |
863                    (planeb_wm << DSPFW_PLANEB_SHIFT) |
864                    (planea_wm << DSPFW_PLANEA_SHIFT));
865         I915_WRITE(DSPFW2,
866                    (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
867                    (cursora_wm << DSPFW_CURSORA_SHIFT));
868         I915_WRITE(DSPFW3,
869                    (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
870                    (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
871
872         if (cxsr_enabled)
873                 intel_set_memory_cxsr(dev_priv, true);
874 }
875
876 static void cherryview_update_wm(struct drm_crtc *crtc)
877 {
878         struct drm_device *dev = crtc->dev;
879         static const int sr_latency_ns = 12000;
880         struct drm_i915_private *dev_priv = dev->dev_private;
881         int planea_wm, planeb_wm, planec_wm;
882         int cursora_wm, cursorb_wm, cursorc_wm;
883         int plane_sr, cursor_sr;
884         int ignore_plane_sr, ignore_cursor_sr;
885         unsigned int enabled = 0;
886         bool cxsr_enabled;
887
888         vlv_update_drain_latency(crtc);
889
890         if (g4x_compute_wm0(dev, PIPE_A,
891                             &valleyview_wm_info, pessimal_latency_ns,
892                             &valleyview_cursor_wm_info, pessimal_latency_ns,
893                             &planea_wm, &cursora_wm))
894                 enabled |= 1 << PIPE_A;
895
896         if (g4x_compute_wm0(dev, PIPE_B,
897                             &valleyview_wm_info, pessimal_latency_ns,
898                             &valleyview_cursor_wm_info, pessimal_latency_ns,
899                             &planeb_wm, &cursorb_wm))
900                 enabled |= 1 << PIPE_B;
901
902         if (g4x_compute_wm0(dev, PIPE_C,
903                             &valleyview_wm_info, pessimal_latency_ns,
904                             &valleyview_cursor_wm_info, pessimal_latency_ns,
905                             &planec_wm, &cursorc_wm))
906                 enabled |= 1 << PIPE_C;
907
908         if (single_plane_enabled(enabled) &&
909             g4x_compute_srwm(dev, ffs(enabled) - 1,
910                              sr_latency_ns,
911                              &valleyview_wm_info,
912                              &valleyview_cursor_wm_info,
913                              &plane_sr, &ignore_cursor_sr) &&
914             g4x_compute_srwm(dev, ffs(enabled) - 1,
915                              2*sr_latency_ns,
916                              &valleyview_wm_info,
917                              &valleyview_cursor_wm_info,
918                              &ignore_plane_sr, &cursor_sr)) {
919                 cxsr_enabled = true;
920         } else {
921                 cxsr_enabled = false;
922                 intel_set_memory_cxsr(dev_priv, false);
923                 plane_sr = cursor_sr = 0;
924         }
925
926         DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
927                       "B: plane=%d, cursor=%d, C: plane=%d, cursor=%d, "
928                       "SR: plane=%d, cursor=%d\n",
929                       planea_wm, cursora_wm,
930                       planeb_wm, cursorb_wm,
931                       planec_wm, cursorc_wm,
932                       plane_sr, cursor_sr);
933
934         I915_WRITE(DSPFW1,
935                    (plane_sr << DSPFW_SR_SHIFT) |
936                    (cursorb_wm << DSPFW_CURSORB_SHIFT) |
937                    (planeb_wm << DSPFW_PLANEB_SHIFT) |
938                    (planea_wm << DSPFW_PLANEA_SHIFT));
939         I915_WRITE(DSPFW2,
940                    (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
941                    (cursora_wm << DSPFW_CURSORA_SHIFT));
942         I915_WRITE(DSPFW3,
943                    (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
944                    (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
945         I915_WRITE(DSPFW9_CHV,
946                    (I915_READ(DSPFW9_CHV) & ~(DSPFW_PLANEC_MASK |
947                                               DSPFW_CURSORC_MASK)) |
948                    (planec_wm << DSPFW_PLANEC_SHIFT) |
949                    (cursorc_wm << DSPFW_CURSORC_SHIFT));
950
951         if (cxsr_enabled)
952                 intel_set_memory_cxsr(dev_priv, true);
953 }
954
955 static void valleyview_update_sprite_wm(struct drm_plane *plane,
956                                         struct drm_crtc *crtc,
957                                         uint32_t sprite_width,
958                                         uint32_t sprite_height,
959                                         int pixel_size,
960                                         bool enabled, bool scaled)
961 {
962         struct drm_device *dev = crtc->dev;
963         struct drm_i915_private *dev_priv = dev->dev_private;
964         int pipe = to_intel_plane(plane)->pipe;
965         int sprite = to_intel_plane(plane)->plane;
966         int drain_latency;
967         int plane_prec;
968         int sprite_dl;
969         int prec_mult;
970         const int high_precision = IS_CHERRYVIEW(dev) ?
971                 DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64;
972
973         sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_HIGH(sprite) |
974                     (DRAIN_LATENCY_MASK << DDL_SPRITE_SHIFT(sprite)));
975
976         if (enabled && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult,
977                                                  &drain_latency)) {
978                 plane_prec = (prec_mult == high_precision) ?
979                                            DDL_SPRITE_PRECISION_HIGH(sprite) :
980                                            DDL_SPRITE_PRECISION_LOW(sprite);
981                 sprite_dl |= plane_prec |
982                              (drain_latency << DDL_SPRITE_SHIFT(sprite));
983         }
984
985         I915_WRITE(VLV_DDL(pipe), sprite_dl);
986 }
987
988 static void g4x_update_wm(struct drm_crtc *crtc)
989 {
990         struct drm_device *dev = crtc->dev;
991         static const int sr_latency_ns = 12000;
992         struct drm_i915_private *dev_priv = dev->dev_private;
993         int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
994         int plane_sr, cursor_sr;
995         unsigned int enabled = 0;
996         bool cxsr_enabled;
997
998         if (g4x_compute_wm0(dev, PIPE_A,
999                             &g4x_wm_info, pessimal_latency_ns,
1000                             &g4x_cursor_wm_info, pessimal_latency_ns,
1001                             &planea_wm, &cursora_wm))
1002                 enabled |= 1 << PIPE_A;
1003
1004         if (g4x_compute_wm0(dev, PIPE_B,
1005                             &g4x_wm_info, pessimal_latency_ns,
1006                             &g4x_cursor_wm_info, pessimal_latency_ns,
1007                             &planeb_wm, &cursorb_wm))
1008                 enabled |= 1 << PIPE_B;
1009
1010         if (single_plane_enabled(enabled) &&
1011             g4x_compute_srwm(dev, ffs(enabled) - 1,
1012                              sr_latency_ns,
1013                              &g4x_wm_info,
1014                              &g4x_cursor_wm_info,
1015                              &plane_sr, &cursor_sr)) {
1016                 cxsr_enabled = true;
1017         } else {
1018                 cxsr_enabled = false;
1019                 intel_set_memory_cxsr(dev_priv, false);
1020                 plane_sr = cursor_sr = 0;
1021         }
1022
1023         DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
1024                       "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
1025                       planea_wm, cursora_wm,
1026                       planeb_wm, cursorb_wm,
1027                       plane_sr, cursor_sr);
1028
1029         I915_WRITE(DSPFW1,
1030                    (plane_sr << DSPFW_SR_SHIFT) |
1031                    (cursorb_wm << DSPFW_CURSORB_SHIFT) |
1032                    (planeb_wm << DSPFW_PLANEB_SHIFT) |
1033                    (planea_wm << DSPFW_PLANEA_SHIFT));
1034         I915_WRITE(DSPFW2,
1035                    (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
1036                    (cursora_wm << DSPFW_CURSORA_SHIFT));
1037         /* HPLL off in SR has some issues on G4x... disable it */
1038         I915_WRITE(DSPFW3,
1039                    (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) |
1040                    (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1041
1042         if (cxsr_enabled)
1043                 intel_set_memory_cxsr(dev_priv, true);
1044 }
1045
1046 static void i965_update_wm(struct drm_crtc *unused_crtc)
1047 {
1048         struct drm_device *dev = unused_crtc->dev;
1049         struct drm_i915_private *dev_priv = dev->dev_private;
1050         struct drm_crtc *crtc;
1051         int srwm = 1;
1052         int cursor_sr = 16;
1053         bool cxsr_enabled;
1054
1055         /* Calc sr entries for one plane configs */
1056         crtc = single_enabled_crtc(dev);
1057         if (crtc) {
1058                 /* self-refresh has much higher latency */
1059                 static const int sr_latency_ns = 12000;
1060                 const struct drm_display_mode *adjusted_mode =
1061                         &to_intel_crtc(crtc)->config->base.adjusted_mode;
1062                 int clock = adjusted_mode->crtc_clock;
1063                 int htotal = adjusted_mode->crtc_htotal;
1064                 int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w;
1065                 int pixel_size = crtc->primary->fb->bits_per_pixel / 8;
1066                 unsigned long line_time_us;
1067                 int entries;
1068
1069                 line_time_us = max(htotal * 1000 / clock, 1);
1070
1071                 /* Use ns/us then divide to preserve precision */
1072                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1073                         pixel_size * hdisplay;
1074                 entries = DIV_ROUND_UP(entries, I915_FIFO_LINE_SIZE);
1075                 srwm = I965_FIFO_SIZE - entries;
1076                 if (srwm < 0)
1077                         srwm = 1;
1078                 srwm &= 0x1ff;
1079                 DRM_DEBUG_KMS("self-refresh entries: %d, wm: %d\n",
1080                               entries, srwm);
1081
1082                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1083                         pixel_size * to_intel_crtc(crtc)->cursor_width;
1084                 entries = DIV_ROUND_UP(entries,
1085                                           i965_cursor_wm_info.cacheline_size);
1086                 cursor_sr = i965_cursor_wm_info.fifo_size -
1087                         (entries + i965_cursor_wm_info.guard_size);
1088
1089                 if (cursor_sr > i965_cursor_wm_info.max_wm)
1090                         cursor_sr = i965_cursor_wm_info.max_wm;
1091
1092                 DRM_DEBUG_KMS("self-refresh watermark: display plane %d "
1093                               "cursor %d\n", srwm, cursor_sr);
1094
1095                 cxsr_enabled = true;
1096         } else {
1097                 cxsr_enabled = false;
1098                 /* Turn off self refresh if both pipes are enabled */
1099                 intel_set_memory_cxsr(dev_priv, false);
1100         }
1101
1102         DRM_DEBUG_KMS("Setting FIFO watermarks - A: 8, B: 8, C: 8, SR %d\n",
1103                       srwm);
1104
1105         /* 965 has limitations... */
1106         I915_WRITE(DSPFW1, (srwm << DSPFW_SR_SHIFT) |
1107                    (8 << DSPFW_CURSORB_SHIFT) |
1108                    (8 << DSPFW_PLANEB_SHIFT) |
1109                    (8 << DSPFW_PLANEA_SHIFT));
1110         I915_WRITE(DSPFW2, (8 << DSPFW_CURSORA_SHIFT) |
1111                    (8 << DSPFW_PLANEC_SHIFT_OLD));
1112         /* update cursor SR watermark */
1113         I915_WRITE(DSPFW3, (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
1114
1115         if (cxsr_enabled)
1116                 intel_set_memory_cxsr(dev_priv, true);
1117 }
1118
1119 static void i9xx_update_wm(struct drm_crtc *unused_crtc)
1120 {
1121         struct drm_device *dev = unused_crtc->dev;
1122         struct drm_i915_private *dev_priv = dev->dev_private;
1123         const struct intel_watermark_params *wm_info;
1124         uint32_t fwater_lo;
1125         uint32_t fwater_hi;
1126         int cwm, srwm = 1;
1127         int fifo_size;
1128         int planea_wm, planeb_wm;
1129         struct drm_crtc *crtc, *enabled = NULL;
1130
1131         if (IS_I945GM(dev))
1132                 wm_info = &i945_wm_info;
1133         else if (!IS_GEN2(dev))
1134                 wm_info = &i915_wm_info;
1135         else
1136                 wm_info = &i830_a_wm_info;
1137
1138         fifo_size = dev_priv->display.get_fifo_size(dev, 0);
1139         crtc = intel_get_crtc_for_plane(dev, 0);
1140         if (intel_crtc_active(crtc)) {
1141                 const struct drm_display_mode *adjusted_mode;
1142                 int cpp = crtc->primary->fb->bits_per_pixel / 8;
1143                 if (IS_GEN2(dev))
1144                         cpp = 4;
1145
1146                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1147                 planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1148                                                wm_info, fifo_size, cpp,
1149                                                pessimal_latency_ns);
1150                 enabled = crtc;
1151         } else {
1152                 planea_wm = fifo_size - wm_info->guard_size;
1153                 if (planea_wm > (long)wm_info->max_wm)
1154                         planea_wm = wm_info->max_wm;
1155         }
1156
1157         if (IS_GEN2(dev))
1158                 wm_info = &i830_bc_wm_info;
1159
1160         fifo_size = dev_priv->display.get_fifo_size(dev, 1);
1161         crtc = intel_get_crtc_for_plane(dev, 1);
1162         if (intel_crtc_active(crtc)) {
1163                 const struct drm_display_mode *adjusted_mode;
1164                 int cpp = crtc->primary->fb->bits_per_pixel / 8;
1165                 if (IS_GEN2(dev))
1166                         cpp = 4;
1167
1168                 adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1169                 planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1170                                                wm_info, fifo_size, cpp,
1171                                                pessimal_latency_ns);
1172                 if (enabled == NULL)
1173                         enabled = crtc;
1174                 else
1175                         enabled = NULL;
1176         } else {
1177                 planeb_wm = fifo_size - wm_info->guard_size;
1178                 if (planeb_wm > (long)wm_info->max_wm)
1179                         planeb_wm = wm_info->max_wm;
1180         }
1181
1182         DRM_DEBUG_KMS("FIFO watermarks - A: %d, B: %d\n", planea_wm, planeb_wm);
1183
1184         if (IS_I915GM(dev) && enabled) {
1185                 struct drm_i915_gem_object *obj;
1186
1187                 obj = intel_fb_obj(enabled->primary->fb);
1188
1189                 /* self-refresh seems busted with untiled */
1190                 if (obj->tiling_mode == I915_TILING_NONE)
1191                         enabled = NULL;
1192         }
1193
1194         /*
1195          * Overlay gets an aggressive default since video jitter is bad.
1196          */
1197         cwm = 2;
1198
1199         /* Play safe and disable self-refresh before adjusting watermarks. */
1200         intel_set_memory_cxsr(dev_priv, false);
1201
1202         /* Calc sr entries for one plane configs */
1203         if (HAS_FW_BLC(dev) && enabled) {
1204                 /* self-refresh has much higher latency */
1205                 static const int sr_latency_ns = 6000;
1206                 const struct drm_display_mode *adjusted_mode =
1207                         &to_intel_crtc(enabled)->config->base.adjusted_mode;
1208                 int clock = adjusted_mode->crtc_clock;
1209                 int htotal = adjusted_mode->crtc_htotal;
1210                 int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w;
1211                 int pixel_size = enabled->primary->fb->bits_per_pixel / 8;
1212                 unsigned long line_time_us;
1213                 int entries;
1214
1215                 line_time_us = max(htotal * 1000 / clock, 1);
1216
1217                 /* Use ns/us then divide to preserve precision */
1218                 entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) *
1219                         pixel_size * hdisplay;
1220                 entries = DIV_ROUND_UP(entries, wm_info->cacheline_size);
1221                 DRM_DEBUG_KMS("self-refresh entries: %d\n", entries);
1222                 srwm = wm_info->fifo_size - entries;
1223                 if (srwm < 0)
1224                         srwm = 1;
1225
1226                 if (IS_I945G(dev) || IS_I945GM(dev))
1227                         I915_WRITE(FW_BLC_SELF,
1228                                    FW_BLC_SELF_FIFO_MASK | (srwm & 0xff));
1229                 else if (IS_I915GM(dev))
1230                         I915_WRITE(FW_BLC_SELF, srwm & 0x3f);
1231         }
1232
1233         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d, B: %d, C: %d, SR %d\n",
1234                       planea_wm, planeb_wm, cwm, srwm);
1235
1236         fwater_lo = ((planeb_wm & 0x3f) << 16) | (planea_wm & 0x3f);
1237         fwater_hi = (cwm & 0x1f);
1238
1239         /* Set request length to 8 cachelines per fetch */
1240         fwater_lo = fwater_lo | (1 << 24) | (1 << 8);
1241         fwater_hi = fwater_hi | (1 << 8);
1242
1243         I915_WRITE(FW_BLC, fwater_lo);
1244         I915_WRITE(FW_BLC2, fwater_hi);
1245
1246         if (enabled)
1247                 intel_set_memory_cxsr(dev_priv, true);
1248 }
1249
1250 static void i845_update_wm(struct drm_crtc *unused_crtc)
1251 {
1252         struct drm_device *dev = unused_crtc->dev;
1253         struct drm_i915_private *dev_priv = dev->dev_private;
1254         struct drm_crtc *crtc;
1255         const struct drm_display_mode *adjusted_mode;
1256         uint32_t fwater_lo;
1257         int planea_wm;
1258
1259         crtc = single_enabled_crtc(dev);
1260         if (crtc == NULL)
1261                 return;
1262
1263         adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode;
1264         planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock,
1265                                        &i845_wm_info,
1266                                        dev_priv->display.get_fifo_size(dev, 0),
1267                                        4, pessimal_latency_ns);
1268         fwater_lo = I915_READ(FW_BLC) & ~0xfff;
1269         fwater_lo |= (3<<8) | planea_wm;
1270
1271         DRM_DEBUG_KMS("Setting FIFO watermarks - A: %d\n", planea_wm);
1272
1273         I915_WRITE(FW_BLC, fwater_lo);
1274 }
1275
1276 static uint32_t ilk_pipe_pixel_rate(struct drm_device *dev,
1277                                     struct drm_crtc *crtc)
1278 {
1279         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1280         uint32_t pixel_rate;
1281
1282         pixel_rate = intel_crtc->config->base.adjusted_mode.crtc_clock;
1283
1284         /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to
1285          * adjust the pixel_rate here. */
1286
1287         if (intel_crtc->config->pch_pfit.enabled) {
1288                 uint64_t pipe_w, pipe_h, pfit_w, pfit_h;
1289                 uint32_t pfit_size = intel_crtc->config->pch_pfit.size;
1290
1291                 pipe_w = intel_crtc->config->pipe_src_w;
1292                 pipe_h = intel_crtc->config->pipe_src_h;
1293                 pfit_w = (pfit_size >> 16) & 0xFFFF;
1294                 pfit_h = pfit_size & 0xFFFF;
1295                 if (pipe_w < pfit_w)
1296                         pipe_w = pfit_w;
1297                 if (pipe_h < pfit_h)
1298                         pipe_h = pfit_h;
1299
1300                 pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h,
1301                                      pfit_w * pfit_h);
1302         }
1303
1304         return pixel_rate;
1305 }
1306
1307 /* latency must be in 0.1us units. */
1308 static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
1309                                uint32_t latency)
1310 {
1311         uint64_t ret;
1312
1313         if (WARN(latency == 0, "Latency value missing\n"))
1314                 return UINT_MAX;
1315
1316         ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
1317         ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
1318
1319         return ret;
1320 }
1321
1322 /* latency must be in 0.1us units. */
1323 static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
1324                                uint32_t horiz_pixels, uint8_t bytes_per_pixel,
1325                                uint32_t latency)
1326 {
1327         uint32_t ret;
1328
1329         if (WARN(latency == 0, "Latency value missing\n"))
1330                 return UINT_MAX;
1331
1332         ret = (latency * pixel_rate) / (pipe_htotal * 10000);
1333         ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
1334         ret = DIV_ROUND_UP(ret, 64) + 2;
1335         return ret;
1336 }
1337
1338 static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
1339                            uint8_t bytes_per_pixel)
1340 {
1341         return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
1342 }
1343
1344 struct skl_pipe_wm_parameters {
1345         bool active;
1346         uint32_t pipe_htotal;
1347         uint32_t pixel_rate; /* in KHz */
1348         struct intel_plane_wm_parameters plane[I915_MAX_PLANES];
1349         struct intel_plane_wm_parameters cursor;
1350 };
1351
1352 struct ilk_pipe_wm_parameters {
1353         bool active;
1354         uint32_t pipe_htotal;
1355         uint32_t pixel_rate;
1356         struct intel_plane_wm_parameters pri;
1357         struct intel_plane_wm_parameters spr;
1358         struct intel_plane_wm_parameters cur;
1359 };
1360
1361 struct ilk_wm_maximums {
1362         uint16_t pri;
1363         uint16_t spr;
1364         uint16_t cur;
1365         uint16_t fbc;
1366 };
1367
1368 /* used in computing the new watermarks state */
1369 struct intel_wm_config {
1370         unsigned int num_pipes_active;
1371         bool sprites_enabled;
1372         bool sprites_scaled;
1373 };
1374
1375 /*
1376  * For both WM_PIPE and WM_LP.
1377  * mem_value must be in 0.1us units.
1378  */
1379 static uint32_t ilk_compute_pri_wm(const struct ilk_pipe_wm_parameters *params,
1380                                    uint32_t mem_value,
1381                                    bool is_lp)
1382 {
1383         uint32_t method1, method2;
1384
1385         if (!params->active || !params->pri.enabled)
1386                 return 0;
1387
1388         method1 = ilk_wm_method1(params->pixel_rate,
1389                                  params->pri.bytes_per_pixel,
1390                                  mem_value);
1391
1392         if (!is_lp)
1393                 return method1;
1394
1395         method2 = ilk_wm_method2(params->pixel_rate,
1396                                  params->pipe_htotal,
1397                                  params->pri.horiz_pixels,
1398                                  params->pri.bytes_per_pixel,
1399                                  mem_value);
1400
1401         return min(method1, method2);
1402 }
1403
1404 /*
1405  * For both WM_PIPE and WM_LP.
1406  * mem_value must be in 0.1us units.
1407  */
1408 static uint32_t ilk_compute_spr_wm(const struct ilk_pipe_wm_parameters *params,
1409                                    uint32_t mem_value)
1410 {
1411         uint32_t method1, method2;
1412
1413         if (!params->active || !params->spr.enabled)
1414                 return 0;
1415
1416         method1 = ilk_wm_method1(params->pixel_rate,
1417                                  params->spr.bytes_per_pixel,
1418                                  mem_value);
1419         method2 = ilk_wm_method2(params->pixel_rate,
1420                                  params->pipe_htotal,
1421                                  params->spr.horiz_pixels,
1422                                  params->spr.bytes_per_pixel,
1423                                  mem_value);
1424         return min(method1, method2);
1425 }
1426
1427 /*
1428  * For both WM_PIPE and WM_LP.
1429  * mem_value must be in 0.1us units.
1430  */
1431 static uint32_t ilk_compute_cur_wm(const struct ilk_pipe_wm_parameters *params,
1432                                    uint32_t mem_value)
1433 {
1434         if (!params->active || !params->cur.enabled)
1435                 return 0;
1436
1437         return ilk_wm_method2(params->pixel_rate,
1438                               params->pipe_htotal,
1439                               params->cur.horiz_pixels,
1440                               params->cur.bytes_per_pixel,
1441                               mem_value);
1442 }
1443
1444 /* Only for WM_LP. */
1445 static uint32_t ilk_compute_fbc_wm(const struct ilk_pipe_wm_parameters *params,
1446                                    uint32_t pri_val)
1447 {
1448         if (!params->active || !params->pri.enabled)
1449                 return 0;
1450
1451         return ilk_wm_fbc(pri_val,
1452                           params->pri.horiz_pixels,
1453                           params->pri.bytes_per_pixel);
1454 }
1455
1456 static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
1457 {
1458         if (INTEL_INFO(dev)->gen >= 8)
1459                 return 3072;
1460         else if (INTEL_INFO(dev)->gen >= 7)
1461                 return 768;
1462         else
1463                 return 512;
1464 }
1465
1466 static unsigned int ilk_plane_wm_reg_max(const struct drm_device *dev,
1467                                          int level, bool is_sprite)
1468 {
1469         if (INTEL_INFO(dev)->gen >= 8)
1470                 /* BDW primary/sprite plane watermarks */
1471                 return level == 0 ? 255 : 2047;
1472         else if (INTEL_INFO(dev)->gen >= 7)
1473                 /* IVB/HSW primary/sprite plane watermarks */
1474                 return level == 0 ? 127 : 1023;
1475         else if (!is_sprite)
1476                 /* ILK/SNB primary plane watermarks */
1477                 return level == 0 ? 127 : 511;
1478         else
1479                 /* ILK/SNB sprite plane watermarks */
1480                 return level == 0 ? 63 : 255;
1481 }
1482
1483 static unsigned int ilk_cursor_wm_reg_max(const struct drm_device *dev,
1484                                           int level)
1485 {
1486         if (INTEL_INFO(dev)->gen >= 7)
1487                 return level == 0 ? 63 : 255;
1488         else
1489                 return level == 0 ? 31 : 63;
1490 }
1491
1492 static unsigned int ilk_fbc_wm_reg_max(const struct drm_device *dev)
1493 {
1494         if (INTEL_INFO(dev)->gen >= 8)
1495                 return 31;
1496         else
1497                 return 15;
1498 }
1499
1500 /* Calculate the maximum primary/sprite plane watermark */
1501 static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
1502                                      int level,
1503                                      const struct intel_wm_config *config,
1504                                      enum intel_ddb_partitioning ddb_partitioning,
1505                                      bool is_sprite)
1506 {
1507         unsigned int fifo_size = ilk_display_fifo_size(dev);
1508
1509         /* if sprites aren't enabled, sprites get nothing */
1510         if (is_sprite && !config->sprites_enabled)
1511                 return 0;
1512
1513         /* HSW allows LP1+ watermarks even with multiple pipes */
1514         if (level == 0 || config->num_pipes_active > 1) {
1515                 fifo_size /= INTEL_INFO(dev)->num_pipes;
1516
1517                 /*
1518                  * For some reason the non self refresh
1519                  * FIFO size is only half of the self
1520                  * refresh FIFO size on ILK/SNB.
1521                  */
1522                 if (INTEL_INFO(dev)->gen <= 6)
1523                         fifo_size /= 2;
1524         }
1525
1526         if (config->sprites_enabled) {
1527                 /* level 0 is always calculated with 1:1 split */
1528                 if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
1529                         if (is_sprite)
1530                                 fifo_size *= 5;
1531                         fifo_size /= 6;
1532                 } else {
1533                         fifo_size /= 2;
1534                 }
1535         }
1536
1537         /* clamp to max that the registers can hold */
1538         return min(fifo_size, ilk_plane_wm_reg_max(dev, level, is_sprite));
1539 }
1540
1541 /* Calculate the maximum cursor plane watermark */
1542 static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
1543                                       int level,
1544                                       const struct intel_wm_config *config)
1545 {
1546         /* HSW LP1+ watermarks w/ multiple pipes */
1547         if (level > 0 && config->num_pipes_active > 1)
1548                 return 64;
1549
1550         /* otherwise just report max that registers can hold */
1551         return ilk_cursor_wm_reg_max(dev, level);
1552 }
1553
1554 static void ilk_compute_wm_maximums(const struct drm_device *dev,
1555                                     int level,
1556                                     const struct intel_wm_config *config,
1557                                     enum intel_ddb_partitioning ddb_partitioning,
1558                                     struct ilk_wm_maximums *max)
1559 {
1560         max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
1561         max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
1562         max->cur = ilk_cursor_wm_max(dev, level, config);
1563         max->fbc = ilk_fbc_wm_reg_max(dev);
1564 }
1565
1566 static void ilk_compute_wm_reg_maximums(struct drm_device *dev,
1567                                         int level,
1568                                         struct ilk_wm_maximums *max)
1569 {
1570         max->pri = ilk_plane_wm_reg_max(dev, level, false);
1571         max->spr = ilk_plane_wm_reg_max(dev, level, true);
1572         max->cur = ilk_cursor_wm_reg_max(dev, level);
1573         max->fbc = ilk_fbc_wm_reg_max(dev);
1574 }
1575
1576 static bool ilk_validate_wm_level(int level,
1577                                   const struct ilk_wm_maximums *max,
1578                                   struct intel_wm_level *result)
1579 {
1580         bool ret;
1581
1582         /* already determined to be invalid? */
1583         if (!result->enable)
1584                 return false;
1585
1586         result->enable = result->pri_val <= max->pri &&
1587                          result->spr_val <= max->spr &&
1588                          result->cur_val <= max->cur;
1589
1590         ret = result->enable;
1591
1592         /*
1593          * HACK until we can pre-compute everything,
1594          * and thus fail gracefully if LP0 watermarks
1595          * are exceeded...
1596          */
1597         if (level == 0 && !result->enable) {
1598                 if (result->pri_val > max->pri)
1599                         DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
1600                                       level, result->pri_val, max->pri);
1601                 if (result->spr_val > max->spr)
1602                         DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
1603                                       level, result->spr_val, max->spr);
1604                 if (result->cur_val > max->cur)
1605                         DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
1606                                       level, result->cur_val, max->cur);
1607
1608                 result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
1609                 result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
1610                 result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
1611                 result->enable = true;
1612         }
1613
1614         return ret;
1615 }
1616
1617 static void ilk_compute_wm_level(const struct drm_i915_private *dev_priv,
1618                                  int level,
1619                                  const struct ilk_pipe_wm_parameters *p,
1620                                  struct intel_wm_level *result)
1621 {
1622         uint16_t pri_latency = dev_priv->wm.pri_latency[level];
1623         uint16_t spr_latency = dev_priv->wm.spr_latency[level];
1624         uint16_t cur_latency = dev_priv->wm.cur_latency[level];
1625
1626         /* WM1+ latency values stored in 0.5us units */
1627         if (level > 0) {
1628                 pri_latency *= 5;
1629                 spr_latency *= 5;
1630                 cur_latency *= 5;
1631         }
1632
1633         result->pri_val = ilk_compute_pri_wm(p, pri_latency, level);
1634         result->spr_val = ilk_compute_spr_wm(p, spr_latency);
1635         result->cur_val = ilk_compute_cur_wm(p, cur_latency);
1636         result->fbc_val = ilk_compute_fbc_wm(p, result->pri_val);
1637         result->enable = true;
1638 }
1639
1640 static uint32_t
1641 hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
1642 {
1643         struct drm_i915_private *dev_priv = dev->dev_private;
1644         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1645         struct drm_display_mode *mode = &intel_crtc->config->base.adjusted_mode;
1646         u32 linetime, ips_linetime;
1647
1648         if (!intel_crtc_active(crtc))
1649                 return 0;
1650
1651         /* The WM are computed with base on how long it takes to fill a single
1652          * row at the given clock rate, multiplied by 8.
1653          * */
1654         linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
1655                                      mode->crtc_clock);
1656         ips_linetime = DIV_ROUND_CLOSEST(mode->crtc_htotal * 1000 * 8,
1657                                          intel_ddi_get_cdclk_freq(dev_priv));
1658
1659         return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) |
1660                PIPE_WM_LINETIME_TIME(linetime);
1661 }
1662
1663 static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8])
1664 {
1665         struct drm_i915_private *dev_priv = dev->dev_private;
1666
1667         if (IS_GEN9(dev)) {
1668                 uint32_t val;
1669                 int ret, i;
1670                 int level, max_level = ilk_wm_max_level(dev);
1671
1672                 /* read the first set of memory latencies[0:3] */
1673                 val = 0; /* data0 to be programmed to 0 for first set */
1674                 mutex_lock(&dev_priv->rps.hw_lock);
1675                 ret = sandybridge_pcode_read(dev_priv,
1676                                              GEN9_PCODE_READ_MEM_LATENCY,
1677                                              &val);
1678                 mutex_unlock(&dev_priv->rps.hw_lock);
1679
1680                 if (ret) {
1681                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
1682                         return;
1683                 }
1684
1685                 wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
1686                 wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
1687                                 GEN9_MEM_LATENCY_LEVEL_MASK;
1688                 wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
1689                                 GEN9_MEM_LATENCY_LEVEL_MASK;
1690                 wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
1691                                 GEN9_MEM_LATENCY_LEVEL_MASK;
1692
1693                 /* read the second set of memory latencies[4:7] */
1694                 val = 1; /* data0 to be programmed to 1 for second set */
1695                 mutex_lock(&dev_priv->rps.hw_lock);
1696                 ret = sandybridge_pcode_read(dev_priv,
1697                                              GEN9_PCODE_READ_MEM_LATENCY,
1698                                              &val);
1699                 mutex_unlock(&dev_priv->rps.hw_lock);
1700                 if (ret) {
1701                         DRM_ERROR("SKL Mailbox read error = %d\n", ret);
1702                         return;
1703                 }
1704
1705                 wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
1706                 wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
1707                                 GEN9_MEM_LATENCY_LEVEL_MASK;
1708                 wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
1709                                 GEN9_MEM_LATENCY_LEVEL_MASK;
1710                 wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
1711                                 GEN9_MEM_LATENCY_LEVEL_MASK;
1712
1713                 /*
1714                  * punit doesn't take into account the read latency so we need
1715                  * to add 2us to the various latency levels we retrieve from
1716                  * the punit.
1717                  *   - W0 is a bit special in that it's the only level that
1718                  *   can't be disabled if we want to have display working, so
1719                  *   we always add 2us there.
1720                  *   - For levels >=1, punit returns 0us latency when they are
1721                  *   disabled, so we respect that and don't add 2us then
1722                  *
1723                  * Additionally, if a level n (n > 1) has a 0us latency, all
1724                  * levels m (m >= n) need to be disabled. We make sure to
1725                  * sanitize the values out of the punit to satisfy this
1726                  * requirement.
1727                  */
1728                 wm[0] += 2;
1729                 for (level = 1; level <= max_level; level++)
1730                         if (wm[level] != 0)
1731                                 wm[level] += 2;
1732                         else {
1733                                 for (i = level + 1; i <= max_level; i++)
1734                                         wm[i] = 0;
1735
1736                                 break;
1737                         }
1738         } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
1739                 uint64_t sskpd = I915_READ64(MCH_SSKPD);
1740
1741                 wm[0] = (sskpd >> 56) & 0xFF;
1742                 if (wm[0] == 0)
1743                         wm[0] = sskpd & 0xF;
1744                 wm[1] = (sskpd >> 4) & 0xFF;
1745                 wm[2] = (sskpd >> 12) & 0xFF;
1746                 wm[3] = (sskpd >> 20) & 0x1FF;
1747                 wm[4] = (sskpd >> 32) & 0x1FF;
1748         } else if (INTEL_INFO(dev)->gen >= 6) {
1749                 uint32_t sskpd = I915_READ(MCH_SSKPD);
1750
1751                 wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
1752                 wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
1753                 wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
1754                 wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
1755         } else if (INTEL_INFO(dev)->gen >= 5) {
1756                 uint32_t mltr = I915_READ(MLTR_ILK);
1757
1758                 /* ILK primary LP0 latency is 700 ns */
1759                 wm[0] = 7;
1760                 wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
1761                 wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
1762         }
1763 }
1764
1765 static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5])
1766 {
1767         /* ILK sprite LP0 latency is 1300 ns */
1768         if (INTEL_INFO(dev)->gen == 5)
1769                 wm[0] = 13;
1770 }
1771
1772 static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
1773 {
1774         /* ILK cursor LP0 latency is 1300 ns */
1775         if (INTEL_INFO(dev)->gen == 5)
1776                 wm[0] = 13;
1777
1778         /* WaDoubleCursorLP3Latency:ivb */
1779         if (IS_IVYBRIDGE(dev))
1780                 wm[3] *= 2;
1781 }
1782
1783 int ilk_wm_max_level(const struct drm_device *dev)
1784 {
1785         /* how many WM levels are we expecting */
1786         if (IS_GEN9(dev))
1787                 return 7;
1788         else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
1789                 return 4;
1790         else if (INTEL_INFO(dev)->gen >= 6)
1791                 return 3;
1792         else
1793                 return 2;
1794 }
1795
1796 static void intel_print_wm_latency(struct drm_device *dev,
1797                                    const char *name,
1798                                    const uint16_t wm[8])
1799 {
1800         int level, max_level = ilk_wm_max_level(dev);
1801
1802         for (level = 0; level <= max_level; level++) {
1803                 unsigned int latency = wm[level];
1804
1805                 if (latency == 0) {
1806                         DRM_ERROR("%s WM%d latency not provided\n",
1807                                   name, level);
1808                         continue;
1809                 }
1810
1811                 /*
1812                  * - latencies are in us on gen9.
1813                  * - before then, WM1+ latency values are in 0.5us units
1814                  */
1815                 if (IS_GEN9(dev))
1816                         latency *= 10;
1817                 else if (level > 0)
1818                         latency *= 5;
1819
1820                 DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
1821                               name, level, wm[level],
1822                               latency / 10, latency % 10);
1823         }
1824 }
1825
1826 static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv,
1827                                     uint16_t wm[5], uint16_t min)
1828 {
1829         int level, max_level = ilk_wm_max_level(dev_priv->dev);
1830
1831         if (wm[0] >= min)
1832                 return false;
1833
1834         wm[0] = max(wm[0], min);
1835         for (level = 1; level <= max_level; level++)
1836                 wm[level] = max_t(uint16_t, wm[level], DIV_ROUND_UP(min, 5));
1837
1838         return true;
1839 }
1840
1841 static void snb_wm_latency_quirk(struct drm_device *dev)
1842 {
1843         struct drm_i915_private *dev_priv = dev->dev_private;
1844         bool changed;
1845
1846         /*
1847          * The BIOS provided WM memory latency values are often
1848          * inadequate for high resolution displays. Adjust them.
1849          */
1850         changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) |
1851                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) |
1852                 ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12);
1853
1854         if (!changed)
1855                 return;
1856
1857         DRM_DEBUG_KMS("WM latency values increased to avoid potential underruns\n");
1858         intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
1859         intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
1860         intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
1861 }
1862
1863 static void ilk_setup_wm_latency(struct drm_device *dev)
1864 {
1865         struct drm_i915_private *dev_priv = dev->dev_private;
1866
1867         intel_read_wm_latency(dev, dev_priv->wm.pri_latency);
1868
1869         memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
1870                sizeof(dev_priv->wm.pri_latency));
1871         memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
1872                sizeof(dev_priv->wm.pri_latency));
1873
1874         intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency);
1875         intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency);
1876
1877         intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
1878         intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
1879         intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
1880
1881         if (IS_GEN6(dev))
1882                 snb_wm_latency_quirk(dev);
1883 }
1884
1885 static void skl_setup_wm_latency(struct drm_device *dev)
1886 {
1887         struct drm_i915_private *dev_priv = dev->dev_private;
1888
1889         intel_read_wm_latency(dev, dev_priv->wm.skl_latency);
1890         intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency);
1891 }
1892
1893 static void ilk_compute_wm_parameters(struct drm_crtc *crtc,
1894                                       struct ilk_pipe_wm_parameters *p)
1895 {
1896         struct drm_device *dev = crtc->dev;
1897         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
1898         enum pipe pipe = intel_crtc->pipe;
1899         struct drm_plane *plane;
1900
1901         if (!intel_crtc_active(crtc))
1902                 return;
1903
1904         p->active = true;
1905         p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
1906         p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc);
1907         p->pri.bytes_per_pixel = crtc->primary->fb->bits_per_pixel / 8;
1908         p->cur.bytes_per_pixel = 4;
1909         p->pri.horiz_pixels = intel_crtc->config->pipe_src_w;
1910         p->cur.horiz_pixels = intel_crtc->cursor_width;
1911         /* TODO: for now, assume primary and cursor planes are always enabled. */
1912         p->pri.enabled = true;
1913         p->cur.enabled = true;
1914
1915         drm_for_each_legacy_plane(plane, &dev->mode_config.plane_list) {
1916                 struct intel_plane *intel_plane = to_intel_plane(plane);
1917
1918                 if (intel_plane->pipe == pipe) {
1919                         p->spr = intel_plane->wm;
1920                         break;
1921                 }
1922         }
1923 }
1924
1925 static void ilk_compute_wm_config(struct drm_device *dev,
1926                                   struct intel_wm_config *config)
1927 {
1928         struct intel_crtc *intel_crtc;
1929
1930         /* Compute the currently _active_ config */
1931         for_each_intel_crtc(dev, intel_crtc) {
1932                 const struct intel_pipe_wm *wm = &intel_crtc->wm.active;
1933
1934                 if (!wm->pipe_enabled)
1935                         continue;
1936
1937                 config->sprites_enabled |= wm->sprites_enabled;
1938                 config->sprites_scaled |= wm->sprites_scaled;
1939                 config->num_pipes_active++;
1940         }
1941 }
1942
1943 /* Compute new watermarks for the pipe */
1944 static bool intel_compute_pipe_wm(struct drm_crtc *crtc,
1945                                   const struct ilk_pipe_wm_parameters *params,
1946                                   struct intel_pipe_wm *pipe_wm)
1947 {
1948         struct drm_device *dev = crtc->dev;
1949         const struct drm_i915_private *dev_priv = dev->dev_private;
1950         int level, max_level = ilk_wm_max_level(dev);
1951         /* LP0 watermark maximums depend on this pipe alone */
1952         struct intel_wm_config config = {
1953                 .num_pipes_active = 1,
1954                 .sprites_enabled = params->spr.enabled,
1955                 .sprites_scaled = params->spr.scaled,
1956         };
1957         struct ilk_wm_maximums max;
1958
1959         pipe_wm->pipe_enabled = params->active;
1960         pipe_wm->sprites_enabled = params->spr.enabled;
1961         pipe_wm->sprites_scaled = params->spr.scaled;
1962
1963         /* ILK/SNB: LP2+ watermarks only w/o sprites */
1964         if (INTEL_INFO(dev)->gen <= 6 && params->spr.enabled)
1965                 max_level = 1;
1966
1967         /* ILK/SNB/IVB: LP1+ watermarks only w/o scaling */
1968         if (params->spr.scaled)
1969                 max_level = 0;
1970
1971         ilk_compute_wm_level(dev_priv, 0, params, &pipe_wm->wm[0]);
1972
1973         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
1974                 pipe_wm->linetime = hsw_compute_linetime_wm(dev, crtc);
1975
1976         /* LP0 watermarks always use 1/2 DDB partitioning */
1977         ilk_compute_wm_maximums(dev, 0, &config, INTEL_DDB_PART_1_2, &max);
1978
1979         /* At least LP0 must be valid */
1980         if (!ilk_validate_wm_level(0, &max, &pipe_wm->wm[0]))
1981                 return false;
1982
1983         ilk_compute_wm_reg_maximums(dev, 1, &max);
1984
1985         for (level = 1; level <= max_level; level++) {
1986                 struct intel_wm_level wm = {};
1987
1988                 ilk_compute_wm_level(dev_priv, level, params, &wm);
1989
1990                 /*
1991                  * Disable any watermark level that exceeds the
1992                  * register maximums since such watermarks are
1993                  * always invalid.
1994                  */
1995                 if (!ilk_validate_wm_level(level, &max, &wm))
1996                         break;
1997
1998                 pipe_wm->wm[level] = wm;
1999         }
2000
2001         return true;
2002 }
2003
2004 /*
2005  * Merge the watermarks from all active pipes for a specific level.
2006  */
2007 static void ilk_merge_wm_level(struct drm_device *dev,
2008                                int level,
2009                                struct intel_wm_level *ret_wm)
2010 {
2011         const struct intel_crtc *intel_crtc;
2012
2013         ret_wm->enable = true;
2014
2015         for_each_intel_crtc(dev, intel_crtc) {
2016                 const struct intel_pipe_wm *active = &intel_crtc->wm.active;
2017                 const struct intel_wm_level *wm = &active->wm[level];
2018
2019                 if (!active->pipe_enabled)
2020                         continue;
2021
2022                 /*
2023                  * The watermark values may have been used in the past,
2024                  * so we must maintain them in the registers for some
2025                  * time even if the level is now disabled.
2026                  */
2027                 if (!wm->enable)
2028                         ret_wm->enable = false;
2029
2030                 ret_wm->pri_val = max(ret_wm->pri_val, wm->pri_val);
2031                 ret_wm->spr_val = max(ret_wm->spr_val, wm->spr_val);
2032                 ret_wm->cur_val = max(ret_wm->cur_val, wm->cur_val);
2033                 ret_wm->fbc_val = max(ret_wm->fbc_val, wm->fbc_val);
2034         }
2035 }
2036
2037 /*
2038  * Merge all low power watermarks for all active pipes.
2039  */
2040 static void ilk_wm_merge(struct drm_device *dev,
2041                          const struct intel_wm_config *config,
2042                          const struct ilk_wm_maximums *max,
2043                          struct intel_pipe_wm *merged)
2044 {
2045         int level, max_level = ilk_wm_max_level(dev);
2046         int last_enabled_level = max_level;
2047
2048         /* ILK/SNB/IVB: LP1+ watermarks only w/ single pipe */
2049         if ((INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev)) &&
2050             config->num_pipes_active > 1)
2051                 return;
2052
2053         /* ILK: FBC WM must be disabled always */
2054         merged->fbc_wm_enabled = INTEL_INFO(dev)->gen >= 6;
2055
2056         /* merge each WM1+ level */
2057         for (level = 1; level <= max_level; level++) {
2058                 struct intel_wm_level *wm = &merged->wm[level];
2059
2060                 ilk_merge_wm_level(dev, level, wm);
2061
2062                 if (level > last_enabled_level)
2063                         wm->enable = false;
2064                 else if (!ilk_validate_wm_level(level, max, wm))
2065                         /* make sure all following levels get disabled */
2066                         last_enabled_level = level - 1;
2067
2068                 /*
2069                  * The spec says it is preferred to disable
2070                  * FBC WMs instead of disabling a WM level.
2071                  */
2072                 if (wm->fbc_val > max->fbc) {
2073                         if (wm->enable)
2074                                 merged->fbc_wm_enabled = false;
2075                         wm->fbc_val = 0;
2076                 }
2077         }
2078
2079         /* ILK: LP2+ must be disabled when FBC WM is disabled but FBC enabled */
2080         /*
2081          * FIXME this is racy. FBC might get enabled later.
2082          * What we should check here is whether FBC can be
2083          * enabled sometime later.
2084          */
2085         if (IS_GEN5(dev) && !merged->fbc_wm_enabled && intel_fbc_enabled(dev)) {
2086                 for (level = 2; level <= max_level; level++) {
2087                         struct intel_wm_level *wm = &merged->wm[level];
2088
2089                         wm->enable = false;
2090                 }
2091         }
2092 }
2093
2094 static int ilk_wm_lp_to_level(int wm_lp, const struct intel_pipe_wm *pipe_wm)
2095 {
2096         /* LP1,LP2,LP3 levels are either 1,2,3 or 1,3,4 */
2097         return wm_lp + (wm_lp >= 2 && pipe_wm->wm[4].enable);
2098 }
2099
2100 /* The value we need to program into the WM_LPx latency field */
2101 static unsigned int ilk_wm_lp_latency(struct drm_device *dev, int level)
2102 {
2103         struct drm_i915_private *dev_priv = dev->dev_private;
2104
2105         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
2106                 return 2 * level;
2107         else
2108                 return dev_priv->wm.pri_latency[level];
2109 }
2110
2111 static void ilk_compute_wm_results(struct drm_device *dev,
2112                                    const struct intel_pipe_wm *merged,
2113                                    enum intel_ddb_partitioning partitioning,
2114                                    struct ilk_wm_values *results)
2115 {
2116         struct intel_crtc *intel_crtc;
2117         int level, wm_lp;
2118
2119         results->enable_fbc_wm = merged->fbc_wm_enabled;
2120         results->partitioning = partitioning;
2121
2122         /* LP1+ register values */
2123         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2124                 const struct intel_wm_level *r;
2125
2126                 level = ilk_wm_lp_to_level(wm_lp, merged);
2127
2128                 r = &merged->wm[level];
2129
2130                 /*
2131                  * Maintain the watermark values even if the level is
2132                  * disabled. Doing otherwise could cause underruns.
2133                  */
2134                 results->wm_lp[wm_lp - 1] =
2135                         (ilk_wm_lp_latency(dev, level) << WM1_LP_LATENCY_SHIFT) |
2136                         (r->pri_val << WM1_LP_SR_SHIFT) |
2137                         r->cur_val;
2138
2139                 if (r->enable)
2140                         results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
2141
2142                 if (INTEL_INFO(dev)->gen >= 8)
2143                         results->wm_lp[wm_lp - 1] |=
2144                                 r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
2145                 else
2146                         results->wm_lp[wm_lp - 1] |=
2147                                 r->fbc_val << WM1_LP_FBC_SHIFT;
2148
2149                 /*
2150                  * Always set WM1S_LP_EN when spr_val != 0, even if the
2151                  * level is disabled. Doing otherwise could cause underruns.
2152                  */
2153                 if (INTEL_INFO(dev)->gen <= 6 && r->spr_val) {
2154                         WARN_ON(wm_lp != 1);
2155                         results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
2156                 } else
2157                         results->wm_lp_spr[wm_lp - 1] = r->spr_val;
2158         }
2159
2160         /* LP0 register values */
2161         for_each_intel_crtc(dev, intel_crtc) {
2162                 enum pipe pipe = intel_crtc->pipe;
2163                 const struct intel_wm_level *r =
2164                         &intel_crtc->wm.active.wm[0];
2165
2166                 if (WARN_ON(!r->enable))
2167                         continue;
2168
2169                 results->wm_linetime[pipe] = intel_crtc->wm.active.linetime;
2170
2171                 results->wm_pipe[pipe] =
2172                         (r->pri_val << WM0_PIPE_PLANE_SHIFT) |
2173                         (r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
2174                         r->cur_val;
2175         }
2176 }
2177
2178 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
2179  * case both are at the same level. Prefer r1 in case they're the same. */
2180 static struct intel_pipe_wm *ilk_find_best_result(struct drm_device *dev,
2181                                                   struct intel_pipe_wm *r1,
2182                                                   struct intel_pipe_wm *r2)
2183 {
2184         int level, max_level = ilk_wm_max_level(dev);
2185         int level1 = 0, level2 = 0;
2186
2187         for (level = 1; level <= max_level; level++) {
2188                 if (r1->wm[level].enable)
2189                         level1 = level;
2190                 if (r2->wm[level].enable)
2191                         level2 = level;
2192         }
2193
2194         if (level1 == level2) {
2195                 if (r2->fbc_wm_enabled && !r1->fbc_wm_enabled)
2196                         return r2;
2197                 else
2198                         return r1;
2199         } else if (level1 > level2) {
2200                 return r1;
2201         } else {
2202                 return r2;
2203         }
2204 }
2205
2206 /* dirty bits used to track which watermarks need changes */
2207 #define WM_DIRTY_PIPE(pipe) (1 << (pipe))
2208 #define WM_DIRTY_LINETIME(pipe) (1 << (8 + (pipe)))
2209 #define WM_DIRTY_LP(wm_lp) (1 << (15 + (wm_lp)))
2210 #define WM_DIRTY_LP_ALL (WM_DIRTY_LP(1) | WM_DIRTY_LP(2) | WM_DIRTY_LP(3))
2211 #define WM_DIRTY_FBC (1 << 24)
2212 #define WM_DIRTY_DDB (1 << 25)
2213
2214 static unsigned int ilk_compute_wm_dirty(struct drm_i915_private *dev_priv,
2215                                          const struct ilk_wm_values *old,
2216                                          const struct ilk_wm_values *new)
2217 {
2218         unsigned int dirty = 0;
2219         enum pipe pipe;
2220         int wm_lp;
2221
2222         for_each_pipe(dev_priv, pipe) {
2223                 if (old->wm_linetime[pipe] != new->wm_linetime[pipe]) {
2224                         dirty |= WM_DIRTY_LINETIME(pipe);
2225                         /* Must disable LP1+ watermarks too */
2226                         dirty |= WM_DIRTY_LP_ALL;
2227                 }
2228
2229                 if (old->wm_pipe[pipe] != new->wm_pipe[pipe]) {
2230                         dirty |= WM_DIRTY_PIPE(pipe);
2231                         /* Must disable LP1+ watermarks too */
2232                         dirty |= WM_DIRTY_LP_ALL;
2233                 }
2234         }
2235
2236         if (old->enable_fbc_wm != new->enable_fbc_wm) {
2237                 dirty |= WM_DIRTY_FBC;
2238                 /* Must disable LP1+ watermarks too */
2239                 dirty |= WM_DIRTY_LP_ALL;
2240         }
2241
2242         if (old->partitioning != new->partitioning) {
2243                 dirty |= WM_DIRTY_DDB;
2244                 /* Must disable LP1+ watermarks too */
2245                 dirty |= WM_DIRTY_LP_ALL;
2246         }
2247
2248         /* LP1+ watermarks already deemed dirty, no need to continue */
2249         if (dirty & WM_DIRTY_LP_ALL)
2250                 return dirty;
2251
2252         /* Find the lowest numbered LP1+ watermark in need of an update... */
2253         for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
2254                 if (old->wm_lp[wm_lp - 1] != new->wm_lp[wm_lp - 1] ||
2255                     old->wm_lp_spr[wm_lp - 1] != new->wm_lp_spr[wm_lp - 1])
2256                         break;
2257         }
2258
2259         /* ...and mark it and all higher numbered LP1+ watermarks as dirty */
2260         for (; wm_lp <= 3; wm_lp++)
2261                 dirty |= WM_DIRTY_LP(wm_lp);
2262
2263         return dirty;
2264 }
2265
2266 static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
2267                                unsigned int dirty)
2268 {
2269         struct ilk_wm_values *previous = &dev_priv->wm.hw;
2270         bool changed = false;
2271
2272         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
2273                 previous->wm_lp[2] &= ~WM1_LP_SR_EN;
2274                 I915_WRITE(WM3_LP_ILK, previous->wm_lp[2]);
2275                 changed = true;
2276         }
2277         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
2278                 previous->wm_lp[1] &= ~WM1_LP_SR_EN;
2279                 I915_WRITE(WM2_LP_ILK, previous->wm_lp[1]);
2280                 changed = true;
2281         }
2282         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
2283                 previous->wm_lp[0] &= ~WM1_LP_SR_EN;
2284                 I915_WRITE(WM1_LP_ILK, previous->wm_lp[0]);
2285                 changed = true;
2286         }
2287
2288         /*
2289          * Don't touch WM1S_LP_EN here.
2290          * Doing so could cause underruns.
2291          */
2292
2293         return changed;
2294 }
2295
2296 /*
2297  * The spec says we shouldn't write when we don't need, because every write
2298  * causes WMs to be re-evaluated, expending some power.
2299  */
2300 static void ilk_write_wm_values(struct drm_i915_private *dev_priv,
2301                                 struct ilk_wm_values *results)
2302 {
2303         struct drm_device *dev = dev_priv->dev;
2304         struct ilk_wm_values *previous = &dev_priv->wm.hw;
2305         unsigned int dirty;
2306         uint32_t val;
2307
2308         dirty = ilk_compute_wm_dirty(dev_priv, previous, results);
2309         if (!dirty)
2310                 return;
2311
2312         _ilk_disable_lp_wm(dev_priv, dirty);
2313
2314         if (dirty & WM_DIRTY_PIPE(PIPE_A))
2315                 I915_WRITE(WM0_PIPEA_ILK, results->wm_pipe[0]);
2316         if (dirty & WM_DIRTY_PIPE(PIPE_B))
2317                 I915_WRITE(WM0_PIPEB_ILK, results->wm_pipe[1]);
2318         if (dirty & WM_DIRTY_PIPE(PIPE_C))
2319                 I915_WRITE(WM0_PIPEC_IVB, results->wm_pipe[2]);
2320
2321         if (dirty & WM_DIRTY_LINETIME(PIPE_A))
2322                 I915_WRITE(PIPE_WM_LINETIME(PIPE_A), results->wm_linetime[0]);
2323         if (dirty & WM_DIRTY_LINETIME(PIPE_B))
2324                 I915_WRITE(PIPE_WM_LINETIME(PIPE_B), results->wm_linetime[1]);
2325         if (dirty & WM_DIRTY_LINETIME(PIPE_C))
2326                 I915_WRITE(PIPE_WM_LINETIME(PIPE_C), results->wm_linetime[2]);
2327
2328         if (dirty & WM_DIRTY_DDB) {
2329                 if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
2330                         val = I915_READ(WM_MISC);
2331                         if (results->partitioning == INTEL_DDB_PART_1_2)
2332                                 val &= ~WM_MISC_DATA_PARTITION_5_6;
2333                         else
2334                                 val |= WM_MISC_DATA_PARTITION_5_6;
2335                         I915_WRITE(WM_MISC, val);
2336                 } else {
2337                         val = I915_READ(DISP_ARB_CTL2);
2338                         if (results->partitioning == INTEL_DDB_PART_1_2)
2339                                 val &= ~DISP_DATA_PARTITION_5_6;
2340                         else
2341                                 val |= DISP_DATA_PARTITION_5_6;
2342                         I915_WRITE(DISP_ARB_CTL2, val);
2343                 }
2344         }
2345
2346         if (dirty & WM_DIRTY_FBC) {
2347                 val = I915_READ(DISP_ARB_CTL);
2348                 if (results->enable_fbc_wm)
2349                         val &= ~DISP_FBC_WM_DIS;
2350                 else
2351                         val |= DISP_FBC_WM_DIS;
2352                 I915_WRITE(DISP_ARB_CTL, val);
2353         }
2354
2355         if (dirty & WM_DIRTY_LP(1) &&
2356             previous->wm_lp_spr[0] != results->wm_lp_spr[0])
2357                 I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
2358
2359         if (INTEL_INFO(dev)->gen >= 7) {
2360                 if (dirty & WM_DIRTY_LP(2) && previous->wm_lp_spr[1] != results->wm_lp_spr[1])
2361                         I915_WRITE(WM2S_LP_IVB, results->wm_lp_spr[1]);
2362                 if (dirty & WM_DIRTY_LP(3) && previous->wm_lp_spr[2] != results->wm_lp_spr[2])
2363                         I915_WRITE(WM3S_LP_IVB, results->wm_lp_spr[2]);
2364         }
2365
2366         if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] != results->wm_lp[0])
2367                 I915_WRITE(WM1_LP_ILK, results->wm_lp[0]);
2368         if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] != results->wm_lp[1])
2369                 I915_WRITE(WM2_LP_ILK, results->wm_lp[1]);
2370         if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] != results->wm_lp[2])
2371                 I915_WRITE(WM3_LP_ILK, results->wm_lp[2]);
2372
2373         dev_priv->wm.hw = *results;
2374 }
2375
2376 static bool ilk_disable_lp_wm(struct drm_device *dev)
2377 {
2378         struct drm_i915_private *dev_priv = dev->dev_private;
2379
2380         return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
2381 }
2382
2383 /*
2384  * On gen9, we need to allocate Display Data Buffer (DDB) portions to the
2385  * different active planes.
2386  */
2387
2388 #define SKL_DDB_SIZE            896     /* in blocks */
2389
2390 static void
2391 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
2392                                    struct drm_crtc *for_crtc,
2393                                    const struct intel_wm_config *config,
2394                                    const struct skl_pipe_wm_parameters *params,
2395                                    struct skl_ddb_entry *alloc /* out */)
2396 {
2397         struct drm_crtc *crtc;
2398         unsigned int pipe_size, ddb_size;
2399         int nth_active_pipe;
2400
2401         if (!params->active) {
2402                 alloc->start = 0;
2403                 alloc->end = 0;
2404                 return;
2405         }
2406
2407         ddb_size = SKL_DDB_SIZE;
2408
2409         ddb_size -= 4; /* 4 blocks for bypass path allocation */
2410
2411         nth_active_pipe = 0;
2412         for_each_crtc(dev, crtc) {
2413                 if (!intel_crtc_active(crtc))
2414                         continue;
2415
2416                 if (crtc == for_crtc)
2417                         break;
2418
2419                 nth_active_pipe++;
2420         }
2421
2422         pipe_size = ddb_size / config->num_pipes_active;
2423         alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active;
2424         alloc->end = alloc->start + pipe_size;
2425 }
2426
2427 static unsigned int skl_cursor_allocation(const struct intel_wm_config *config)
2428 {
2429         if (config->num_pipes_active == 1)
2430                 return 32;
2431
2432         return 8;
2433 }
2434
2435 static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
2436 {
2437         entry->start = reg & 0x3ff;
2438         entry->end = (reg >> 16) & 0x3ff;
2439         if (entry->end)
2440                 entry->end += 1;
2441 }
2442
2443 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
2444                           struct skl_ddb_allocation *ddb /* out */)
2445 {
2446         struct drm_device *dev = dev_priv->dev;
2447         enum pipe pipe;
2448         int plane;
2449         u32 val;
2450
2451         for_each_pipe(dev_priv, pipe) {
2452                 for_each_plane(pipe, plane) {
2453                         val = I915_READ(PLANE_BUF_CFG(pipe, plane));
2454                         skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane],
2455                                                    val);
2456                 }
2457
2458                 val = I915_READ(CUR_BUF_CFG(pipe));
2459                 skl_ddb_entry_init_from_hw(&ddb->cursor[pipe], val);
2460         }
2461 }
2462
2463 static unsigned int
2464 skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p)
2465 {
2466         return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
2467 }
2468
2469 /*
2470  * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
2471  * a 8192x4096@32bpp framebuffer:
2472  *   3 * 4096 * 8192  * 4 < 2^32
2473  */
2474 static unsigned int
2475 skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
2476                                  const struct skl_pipe_wm_parameters *params)
2477 {
2478         unsigned int total_data_rate = 0;
2479         int plane;
2480
2481         for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2482                 const struct intel_plane_wm_parameters *p;
2483
2484                 p = &params->plane[plane];
2485                 if (!p->enabled)
2486                         continue;
2487
2488                 total_data_rate += skl_plane_relative_data_rate(p);
2489         }
2490
2491         return total_data_rate;
2492 }
2493
2494 static void
2495 skl_allocate_pipe_ddb(struct drm_crtc *crtc,
2496                       const struct intel_wm_config *config,
2497                       const struct skl_pipe_wm_parameters *params,
2498                       struct skl_ddb_allocation *ddb /* out */)
2499 {
2500         struct drm_device *dev = crtc->dev;
2501         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2502         enum pipe pipe = intel_crtc->pipe;
2503         struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
2504         uint16_t alloc_size, start, cursor_blocks;
2505         unsigned int total_data_rate;
2506         int plane;
2507
2508         skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc);
2509         alloc_size = skl_ddb_entry_size(alloc);
2510         if (alloc_size == 0) {
2511                 memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
2512                 memset(&ddb->cursor[pipe], 0, sizeof(ddb->cursor[pipe]));
2513                 return;
2514         }
2515
2516         cursor_blocks = skl_cursor_allocation(config);
2517         ddb->cursor[pipe].start = alloc->end - cursor_blocks;
2518         ddb->cursor[pipe].end = alloc->end;
2519
2520         alloc_size -= cursor_blocks;
2521         alloc->end -= cursor_blocks;
2522
2523         /*
2524          * Each active plane get a portion of the remaining space, in
2525          * proportion to the amount of data they need to fetch from memory.
2526          *
2527          * FIXME: we may not allocate every single block here.
2528          */
2529         total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params);
2530
2531         start = alloc->start;
2532         for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
2533                 const struct intel_plane_wm_parameters *p;
2534                 unsigned int data_rate;
2535                 uint16_t plane_blocks;
2536
2537                 p = &params->plane[plane];
2538                 if (!p->enabled)
2539                         continue;
2540
2541                 data_rate = skl_plane_relative_data_rate(p);
2542
2543                 /*
2544                  * promote the expression to 64 bits to avoid overflowing, the
2545                  * result is < available as data_rate / total_data_rate < 1
2546                  */
2547                 plane_blocks = div_u64((uint64_t)alloc_size * data_rate,
2548                                        total_data_rate);
2549
2550                 ddb->plane[pipe][plane].start = start;
2551                 ddb->plane[pipe][plane].end = start + plane_blocks;
2552
2553                 start += plane_blocks;
2554         }
2555
2556 }
2557
2558 static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config)
2559 {
2560         /* TODO: Take into account the scalers once we support them */
2561         return config->base.adjusted_mode.crtc_clock;
2562 }
2563
2564 /*
2565  * The max latency should be 257 (max the punit can code is 255 and we add 2us
2566  * for the read latency) and bytes_per_pixel should always be <= 8, so that
2567  * should allow pixel_rate up to ~2 GHz which seems sufficient since max
2568  * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
2569 */
2570 static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
2571                                uint32_t latency)
2572 {
2573         uint32_t wm_intermediate_val, ret;
2574
2575         if (latency == 0)
2576                 return UINT_MAX;
2577
2578         wm_intermediate_val = latency * pixel_rate * bytes_per_pixel;
2579         ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
2580
2581         return ret;
2582 }
2583
2584 static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
2585                                uint32_t horiz_pixels, uint8_t bytes_per_pixel,
2586                                uint32_t latency)
2587 {
2588         uint32_t ret, plane_bytes_per_line, wm_intermediate_val;
2589
2590         if (latency == 0)
2591                 return UINT_MAX;
2592
2593         plane_bytes_per_line = horiz_pixels * bytes_per_pixel;
2594         wm_intermediate_val = latency * pixel_rate;
2595         ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) *
2596                                 plane_bytes_per_line;
2597
2598         return ret;
2599 }
2600
2601 static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb,
2602                                        const struct intel_crtc *intel_crtc)
2603 {
2604         struct drm_device *dev = intel_crtc->base.dev;
2605         struct drm_i915_private *dev_priv = dev->dev_private;
2606         const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
2607         enum pipe pipe = intel_crtc->pipe;
2608
2609         if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe],
2610                    sizeof(new_ddb->plane[pipe])))
2611                 return true;
2612
2613         if (memcmp(&new_ddb->cursor[pipe], &cur_ddb->cursor[pipe],
2614                     sizeof(new_ddb->cursor[pipe])))
2615                 return true;
2616
2617         return false;
2618 }
2619
2620 static void skl_compute_wm_global_parameters(struct drm_device *dev,
2621                                              struct intel_wm_config *config)
2622 {
2623         struct drm_crtc *crtc;
2624         struct drm_plane *plane;
2625
2626         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
2627                 config->num_pipes_active += intel_crtc_active(crtc);
2628
2629         /* FIXME: I don't think we need those two global parameters on SKL */
2630         list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
2631                 struct intel_plane *intel_plane = to_intel_plane(plane);
2632
2633                 config->sprites_enabled |= intel_plane->wm.enabled;
2634                 config->sprites_scaled |= intel_plane->wm.scaled;
2635         }
2636 }
2637
2638 static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
2639                                            struct skl_pipe_wm_parameters *p)
2640 {
2641         struct drm_device *dev = crtc->dev;
2642         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2643         enum pipe pipe = intel_crtc->pipe;
2644         struct drm_plane *plane;
2645         int i = 1; /* Index for sprite planes start */
2646
2647         p->active = intel_crtc_active(crtc);
2648         if (p->active) {
2649                 p->pipe_htotal = intel_crtc->config->base.adjusted_mode.crtc_htotal;
2650                 p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
2651
2652                 /*
2653                  * For now, assume primary and cursor planes are always enabled.
2654                  */
2655                 p->plane[0].enabled = true;
2656                 p->plane[0].bytes_per_pixel =
2657                         crtc->primary->fb->bits_per_pixel / 8;
2658                 p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
2659                 p->plane[0].vert_pixels = intel_crtc->config->pipe_src_h;
2660
2661                 p->cursor.enabled = true;
2662                 p->cursor.bytes_per_pixel = 4;
2663                 p->cursor.horiz_pixels = intel_crtc->cursor_width ?
2664                                          intel_crtc->cursor_width : 64;
2665         }
2666
2667         list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
2668                 struct intel_plane *intel_plane = to_intel_plane(plane);
2669
2670                 if (intel_plane->pipe == pipe &&
2671                         plane->type == DRM_PLANE_TYPE_OVERLAY)
2672                         p->plane[i++] = intel_plane->wm;
2673         }
2674 }
2675
2676 static bool skl_compute_plane_wm(struct skl_pipe_wm_parameters *p,
2677                                  struct intel_plane_wm_parameters *p_params,
2678                                  uint16_t ddb_allocation,
2679                                  uint32_t mem_value,
2680                                  uint16_t *out_blocks, /* out */
2681                                  uint8_t *out_lines /* out */)
2682 {
2683         uint32_t method1, method2, plane_bytes_per_line, res_blocks, res_lines;
2684         uint32_t result_bytes;
2685
2686         if (mem_value == 0 || !p->active || !p_params->enabled)
2687                 return false;
2688
2689         method1 = skl_wm_method1(p->pixel_rate,
2690                                  p_params->bytes_per_pixel,
2691                                  mem_value);
2692         method2 = skl_wm_method2(p->pixel_rate,
2693                                  p->pipe_htotal,
2694                                  p_params->horiz_pixels,
2695                                  p_params->bytes_per_pixel,
2696                                  mem_value);
2697
2698         plane_bytes_per_line = p_params->horiz_pixels *
2699                                         p_params->bytes_per_pixel;
2700
2701         /* For now xtile and linear */
2702         if (((ddb_allocation * 512) / plane_bytes_per_line) >= 1)
2703                 result_bytes = min(method1, method2);
2704         else
2705                 result_bytes = method1;
2706
2707         res_blocks = DIV_ROUND_UP(result_bytes, 512) + 1;
2708         res_lines = DIV_ROUND_UP(result_bytes, plane_bytes_per_line);
2709
2710         if (res_blocks > ddb_allocation || res_lines > 31)
2711                 return false;
2712
2713         *out_blocks = res_blocks;
2714         *out_lines = res_lines;
2715
2716         return true;
2717 }
2718
2719 static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
2720                                  struct skl_ddb_allocation *ddb,
2721                                  struct skl_pipe_wm_parameters *p,
2722                                  enum pipe pipe,
2723                                  int level,
2724                                  int num_planes,
2725                                  struct skl_wm_level *result)
2726 {
2727         uint16_t latency = dev_priv->wm.skl_latency[level];
2728         uint16_t ddb_blocks;
2729         int i;
2730
2731         for (i = 0; i < num_planes; i++) {
2732                 ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
2733
2734                 result->plane_en[i] = skl_compute_plane_wm(p, &p->plane[i],
2735                                                 ddb_blocks,
2736                                                 latency,
2737                                                 &result->plane_res_b[i],
2738                                                 &result->plane_res_l[i]);
2739         }
2740
2741         ddb_blocks = skl_ddb_entry_size(&ddb->cursor[pipe]);
2742         result->cursor_en = skl_compute_plane_wm(p, &p->cursor, ddb_blocks,
2743                                                  latency, &result->cursor_res_b,
2744                                                  &result->cursor_res_l);
2745 }
2746
2747 static uint32_t
2748 skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)
2749 {
2750         if (!intel_crtc_active(crtc))
2751                 return 0;
2752
2753         return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
2754
2755 }
2756
2757 static void skl_compute_transition_wm(struct drm_crtc *crtc,
2758                                       struct skl_pipe_wm_parameters *params,
2759                                       struct skl_wm_level *trans_wm /* out */)
2760 {
2761         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2762         int i;
2763
2764         if (!params->active)
2765                 return;
2766
2767         /* Until we know more, just disable transition WMs */
2768         for (i = 0; i < intel_num_planes(intel_crtc); i++)
2769                 trans_wm->plane_en[i] = false;
2770         trans_wm->cursor_en = false;
2771 }
2772
2773 static void skl_compute_pipe_wm(struct drm_crtc *crtc,
2774                                 struct skl_ddb_allocation *ddb,
2775                                 struct skl_pipe_wm_parameters *params,
2776                                 struct skl_pipe_wm *pipe_wm)
2777 {
2778         struct drm_device *dev = crtc->dev;
2779         const struct drm_i915_private *dev_priv = dev->dev_private;
2780         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
2781         int level, max_level = ilk_wm_max_level(dev);
2782
2783         for (level = 0; level <= max_level; level++) {
2784                 skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe,
2785                                      level, intel_num_planes(intel_crtc),
2786                                      &pipe_wm->wm[level]);
2787         }
2788         pipe_wm->linetime = skl_compute_linetime_wm(crtc, params);
2789
2790         skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm);
2791 }
2792
2793 static void skl_compute_wm_results(struct drm_device *dev,
2794                                    struct skl_pipe_wm_parameters *p,
2795                                    struct skl_pipe_wm *p_wm,
2796                                    struct skl_wm_values *r,
2797                                    struct intel_crtc *intel_crtc)
2798 {
2799         int level, max_level = ilk_wm_max_level(dev);
2800         enum pipe pipe = intel_crtc->pipe;
2801         uint32_t temp;
2802         int i;
2803
2804         for (level = 0; level <= max_level; level++) {
2805                 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
2806                         temp = 0;
2807
2808                         temp |= p_wm->wm[level].plane_res_l[i] <<
2809                                         PLANE_WM_LINES_SHIFT;
2810                         temp |= p_wm->wm[level].plane_res_b[i];
2811                         if (p_wm->wm[level].plane_en[i])
2812                                 temp |= PLANE_WM_EN;
2813
2814                         r->plane[pipe][i][level] = temp;
2815                 }
2816
2817                 temp = 0;
2818
2819                 temp |= p_wm->wm[level].cursor_res_l << PLANE_WM_LINES_SHIFT;
2820                 temp |= p_wm->wm[level].cursor_res_b;
2821
2822                 if (p_wm->wm[level].cursor_en)
2823                         temp |= PLANE_WM_EN;
2824
2825                 r->cursor[pipe][level] = temp;
2826
2827         }
2828
2829         /* transition WMs */
2830         for (i = 0; i < intel_num_planes(intel_crtc); i++) {
2831                 temp = 0;
2832                 temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT;
2833                 temp |= p_wm->trans_wm.plane_res_b[i];
2834                 if (p_wm->trans_wm.plane_en[i])
2835                         temp |= PLANE_WM_EN;
2836
2837                 r->plane_trans[pipe][i] = temp;
2838         }
2839
2840         temp = 0;
2841         temp |= p_wm->trans_wm.cursor_res_l << PLANE_WM_LINES_SHIFT;
2842         temp |= p_wm->trans_wm.cursor_res_b;
2843         if (p_wm->trans_wm.cursor_en)
2844                 temp |= PLANE_WM_EN;
2845
2846         r->cursor_trans[pipe] = temp;
2847
2848         r->wm_linetime[pipe] = p_wm->linetime;
2849 }
2850
2851 static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg,
2852                                 const struct skl_ddb_entry *entry)
2853 {
2854         if (entry->end)
2855                 I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
2856         else
2857                 I915_WRITE(reg, 0);
2858 }
2859
2860 static void skl_write_wm_values(struct drm_i915_private *dev_priv,
2861                                 const struct skl_wm_values *new)
2862 {
2863         struct drm_device *dev = dev_priv->dev;
2864         struct intel_crtc *crtc;
2865
2866         list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
2867                 int i, level, max_level = ilk_wm_max_level(dev);
2868                 enum pipe pipe = crtc->pipe;
2869
2870                 if (!new->dirty[pipe])
2871                         continue;
2872
2873                 I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]);
2874
2875                 for (level = 0; level <= max_level; level++) {
2876                         for (i = 0; i < intel_num_planes(crtc); i++)
2877                                 I915_WRITE(PLANE_WM(pipe, i, level),
2878                                            new->plane[pipe][i][level]);
2879                         I915_WRITE(CUR_WM(pipe, level),
2880                                    new->cursor[pipe][level]);
2881                 }
2882                 for (i = 0; i < intel_num_planes(crtc); i++)
2883                         I915_WRITE(PLANE_WM_TRANS(pipe, i),
2884                                    new->plane_trans[pipe][i]);
2885                 I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]);
2886
2887                 for (i = 0; i < intel_num_planes(crtc); i++)
2888                         skl_ddb_entry_write(dev_priv,
2889                                             PLANE_BUF_CFG(pipe, i),
2890                                             &new->ddb.plane[pipe][i]);
2891
2892                 skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
2893                                     &new->ddb.cursor[pipe]);
2894         }
2895 }
2896
2897 /*
2898  * When setting up a new DDB allocation arrangement, we need to correctly
2899  * sequence the times at which the new allocations for the pipes are taken into
2900  * account or we'll have pipes fetching from space previously allocated to
2901  * another pipe.
2902  *
2903  * Roughly the sequence looks like:
2904  *  1. re-allocate the pipe(s) with the allocation being reduced and not
2905  *     overlapping with a previous light-up pipe (another way to put it is:
2906  *     pipes with their new allocation strickly included into their old ones).
2907  *  2. re-allocate the other pipes that get their allocation reduced
2908  *  3. allocate the pipes having their allocation increased
2909  *
2910  * Steps 1. and 2. are here to take care of the following case:
2911  * - Initially DDB looks like this:
2912  *     |   B    |   C    |
2913  * - enable pipe A.
2914  * - pipe B has a reduced DDB allocation that overlaps with the old pipe C
2915  *   allocation
2916  *     |  A  |  B  |  C  |
2917  *
2918  * We need to sequence the re-allocation: C, B, A (and not B, C, A).
2919  */
2920
2921 static void
2922 skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass)
2923 {
2924         struct drm_device *dev = dev_priv->dev;
2925         int plane;
2926
2927         DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass);
2928
2929         for_each_plane(pipe, plane) {
2930                 I915_WRITE(PLANE_SURF(pipe, plane),
2931                            I915_READ(PLANE_SURF(pipe, plane)));
2932         }
2933         I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe)));
2934 }
2935
2936 static bool
2937 skl_ddb_allocation_included(const struct skl_ddb_allocation *old,
2938                             const struct skl_ddb_allocation *new,
2939                             enum pipe pipe)
2940 {
2941         uint16_t old_size, new_size;
2942
2943         old_size = skl_ddb_entry_size(&old->pipe[pipe]);
2944         new_size = skl_ddb_entry_size(&new->pipe[pipe]);
2945
2946         return old_size != new_size &&
2947                new->pipe[pipe].start >= old->pipe[pipe].start &&
2948                new->pipe[pipe].end <= old->pipe[pipe].end;
2949 }
2950
2951 static void skl_flush_wm_values(struct drm_i915_private *dev_priv,
2952                                 struct skl_wm_values *new_values)
2953 {
2954         struct drm_device *dev = dev_priv->dev;
2955         struct skl_ddb_allocation *cur_ddb, *new_ddb;
2956         bool reallocated[I915_MAX_PIPES] = {false, false, false};
2957         struct intel_crtc *crtc;
2958         enum pipe pipe;
2959
2960         new_ddb = &new_values->ddb;
2961         cur_ddb = &dev_priv->wm.skl_hw.ddb;
2962
2963         /*
2964          * First pass: flush the pipes with the new allocation contained into
2965          * the old space.
2966          *
2967          * We'll wait for the vblank on those pipes to ensure we can safely
2968          * re-allocate the freed space without this pipe fetching from it.
2969          */
2970         for_each_intel_crtc(dev, crtc) {
2971                 if (!crtc->active)
2972                         continue;
2973
2974                 pipe = crtc->pipe;
2975
2976                 if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe))
2977                         continue;
2978
2979                 skl_wm_flush_pipe(dev_priv, pipe, 1);
2980                 intel_wait_for_vblank(dev, pipe);
2981
2982                 reallocated[pipe] = true;
2983         }
2984
2985
2986         /*
2987          * Second pass: flush the pipes that are having their allocation
2988          * reduced, but overlapping with a previous allocation.
2989          *
2990          * Here as well we need to wait for the vblank to make sure the freed
2991          * space is not used anymore.
2992          */
2993         for_each_intel_crtc(dev, crtc) {
2994                 if (!crtc->active)
2995                         continue;
2996
2997                 pipe = crtc->pipe;
2998
2999                 if (reallocated[pipe])
3000                         continue;
3001
3002                 if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) <
3003                     skl_ddb_entry_size(&cur_ddb->pipe[pipe])) {
3004                         skl_wm_flush_pipe(dev_priv, pipe, 2);
3005                         intel_wait_for_vblank(dev, pipe);
3006                         reallocated[pipe] = true;
3007                 }
3008         }
3009
3010         /*
3011          * Third pass: flush the pipes that got more space allocated.
3012          *
3013          * We don't need to actively wait for the update here, next vblank
3014          * will just get more DDB space with the correct WM values.
3015          */
3016         for_each_intel_crtc(dev, crtc) {
3017                 if (!crtc->active)
3018                         continue;
3019
3020                 pipe = crtc->pipe;
3021
3022                 /*
3023                  * At this point, only the pipes more space than before are
3024                  * left to re-allocate.
3025                  */
3026                 if (reallocated[pipe])
3027                         continue;
3028
3029                 skl_wm_flush_pipe(dev_priv, pipe, 3);
3030         }
3031 }
3032
3033 static bool skl_update_pipe_wm(struct drm_crtc *crtc,
3034                                struct skl_pipe_wm_parameters *params,
3035                                struct intel_wm_config *config,
3036                                struct skl_ddb_allocation *ddb, /* out */
3037                                struct skl_pipe_wm *pipe_wm /* out */)
3038 {
3039         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3040
3041         skl_compute_wm_pipe_parameters(crtc, params);
3042         skl_allocate_pipe_ddb(crtc, config, params, ddb);
3043         skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
3044
3045         if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm)))
3046                 return false;
3047
3048         intel_crtc->wm.skl_active = *pipe_wm;
3049         return true;
3050 }
3051
3052 static void skl_update_other_pipe_wm(struct drm_device *dev,
3053                                      struct drm_crtc *crtc,
3054                                      struct intel_wm_config *config,
3055                                      struct skl_wm_values *r)
3056 {
3057         struct intel_crtc *intel_crtc;
3058         struct intel_crtc *this_crtc = to_intel_crtc(crtc);
3059
3060         /*
3061          * If the WM update hasn't changed the allocation for this_crtc (the
3062          * crtc we are currently computing the new WM values for), other
3063          * enabled crtcs will keep the same allocation and we don't need to
3064          * recompute anything for them.
3065          */
3066         if (!skl_ddb_allocation_changed(&r->ddb, this_crtc))
3067                 return;
3068
3069         /*
3070          * Otherwise, because of this_crtc being freshly enabled/disabled, the
3071          * other active pipes need new DDB allocation and WM values.
3072          */
3073         list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
3074                                 base.head) {
3075                 struct skl_pipe_wm_parameters params = {};
3076                 struct skl_pipe_wm pipe_wm = {};
3077                 bool wm_changed;
3078
3079                 if (this_crtc->pipe == intel_crtc->pipe)
3080                         continue;
3081
3082                 if (!intel_crtc->active)
3083                         continue;
3084
3085                 wm_changed = skl_update_pipe_wm(&intel_crtc->base,
3086                                                 &params, config,
3087                                                 &r->ddb, &pipe_wm);
3088
3089                 /*
3090                  * If we end up re-computing the other pipe WM values, it's
3091                  * because it was really needed, so we expect the WM values to
3092                  * be different.
3093                  */
3094                 WARN_ON(!wm_changed);
3095
3096                 skl_compute_wm_results(dev, &params, &pipe_wm, r, intel_crtc);
3097                 r->dirty[intel_crtc->pipe] = true;
3098         }
3099 }
3100
3101 static void skl_update_wm(struct drm_crtc *crtc)
3102 {
3103         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3104         struct drm_device *dev = crtc->dev;
3105         struct drm_i915_private *dev_priv = dev->dev_private;
3106         struct skl_pipe_wm_parameters params = {};
3107         struct skl_wm_values *results = &dev_priv->wm.skl_results;
3108         struct skl_pipe_wm pipe_wm = {};
3109         struct intel_wm_config config = {};
3110
3111         memset(results, 0, sizeof(*results));
3112
3113         skl_compute_wm_global_parameters(dev, &config);
3114
3115         if (!skl_update_pipe_wm(crtc, &params, &config,
3116                                 &results->ddb, &pipe_wm))
3117                 return;
3118
3119         skl_compute_wm_results(dev, &params, &pipe_wm, results, intel_crtc);
3120         results->dirty[intel_crtc->pipe] = true;
3121
3122         skl_update_other_pipe_wm(dev, crtc, &config, results);
3123         skl_write_wm_values(dev_priv, results);
3124         skl_flush_wm_values(dev_priv, results);
3125
3126         /* store the new configuration */
3127         dev_priv->wm.skl_hw = *results;
3128 }
3129
3130 static void
3131 skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
3132                      uint32_t sprite_width, uint32_t sprite_height,
3133                      int pixel_size, bool enabled, bool scaled)
3134 {
3135         struct intel_plane *intel_plane = to_intel_plane(plane);
3136
3137         intel_plane->wm.enabled = enabled;
3138         intel_plane->wm.scaled = scaled;
3139         intel_plane->wm.horiz_pixels = sprite_width;
3140         intel_plane->wm.vert_pixels = sprite_height;
3141         intel_plane->wm.bytes_per_pixel = pixel_size;
3142
3143         skl_update_wm(crtc);
3144 }
3145
3146 static void ilk_update_wm(struct drm_crtc *crtc)
3147 {
3148         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3149         struct drm_device *dev = crtc->dev;
3150         struct drm_i915_private *dev_priv = dev->dev_private;
3151         struct ilk_wm_maximums max;
3152         struct ilk_pipe_wm_parameters params = {};
3153         struct ilk_wm_values results = {};
3154         enum intel_ddb_partitioning partitioning;
3155         struct intel_pipe_wm pipe_wm = {};
3156         struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
3157         struct intel_wm_config config = {};
3158
3159         ilk_compute_wm_parameters(crtc, &params);
3160
3161         intel_compute_pipe_wm(crtc, &params, &pipe_wm);
3162
3163         if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
3164                 return;
3165
3166         intel_crtc->wm.active = pipe_wm;
3167
3168         ilk_compute_wm_config(dev, &config);
3169
3170         ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
3171         ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
3172
3173         /* 5/6 split only in single pipe config on IVB+ */
3174         if (INTEL_INFO(dev)->gen >= 7 &&
3175             config.num_pipes_active == 1 && config.sprites_enabled) {
3176                 ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
3177                 ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
3178
3179                 best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
3180         } else {
3181                 best_lp_wm = &lp_wm_1_2;
3182         }
3183
3184         partitioning = (best_lp_wm == &lp_wm_1_2) ?
3185                        INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
3186
3187         ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
3188
3189         ilk_write_wm_values(dev_priv, &results);
3190 }
3191
3192 static void
3193 ilk_update_sprite_wm(struct drm_plane *plane,
3194                      struct drm_crtc *crtc,
3195                      uint32_t sprite_width, uint32_t sprite_height,
3196                      int pixel_size, bool enabled, bool scaled)
3197 {
3198         struct drm_device *dev = plane->dev;
3199         struct intel_plane *intel_plane = to_intel_plane(plane);
3200
3201         intel_plane->wm.enabled = enabled;
3202         intel_plane->wm.scaled = scaled;
3203         intel_plane->wm.horiz_pixels = sprite_width;
3204         intel_plane->wm.vert_pixels = sprite_width;
3205         intel_plane->wm.bytes_per_pixel = pixel_size;
3206
3207         /*
3208          * IVB workaround: must disable low power watermarks for at least
3209          * one frame before enabling scaling.  LP watermarks can be re-enabled
3210          * when scaling is disabled.
3211          *
3212          * WaCxSRDisabledForSpriteScaling:ivb
3213          */
3214         if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev))
3215                 intel_wait_for_vblank(dev, intel_plane->pipe);
3216
3217         ilk_update_wm(crtc);
3218 }
3219
3220 static void skl_pipe_wm_active_state(uint32_t val,
3221                                      struct skl_pipe_wm *active,
3222                                      bool is_transwm,
3223                                      bool is_cursor,
3224                                      int i,
3225                                      int level)
3226 {
3227         bool is_enabled = (val & PLANE_WM_EN) != 0;
3228
3229         if (!is_transwm) {
3230                 if (!is_cursor) {
3231                         active->wm[level].plane_en[i] = is_enabled;
3232                         active->wm[level].plane_res_b[i] =
3233                                         val & PLANE_WM_BLOCKS_MASK;
3234                         active->wm[level].plane_res_l[i] =
3235                                         (val >> PLANE_WM_LINES_SHIFT) &
3236                                                 PLANE_WM_LINES_MASK;
3237                 } else {
3238                         active->wm[level].cursor_en = is_enabled;
3239                         active->wm[level].cursor_res_b =
3240                                         val & PLANE_WM_BLOCKS_MASK;
3241                         active->wm[level].cursor_res_l =
3242                                         (val >> PLANE_WM_LINES_SHIFT) &
3243                                                 PLANE_WM_LINES_MASK;
3244                 }
3245         } else {
3246                 if (!is_cursor) {
3247                         active->trans_wm.plane_en[i] = is_enabled;
3248                         active->trans_wm.plane_res_b[i] =
3249                                         val & PLANE_WM_BLOCKS_MASK;
3250                         active->trans_wm.plane_res_l[i] =
3251                                         (val >> PLANE_WM_LINES_SHIFT) &
3252                                                 PLANE_WM_LINES_MASK;
3253                 } else {
3254                         active->trans_wm.cursor_en = is_enabled;
3255                         active->trans_wm.cursor_res_b =
3256                                         val & PLANE_WM_BLOCKS_MASK;
3257                         active->trans_wm.cursor_res_l =
3258                                         (val >> PLANE_WM_LINES_SHIFT) &
3259                                                 PLANE_WM_LINES_MASK;
3260                 }
3261         }
3262 }
3263
3264 static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3265 {
3266         struct drm_device *dev = crtc->dev;
3267         struct drm_i915_private *dev_priv = dev->dev_private;
3268         struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
3269         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3270         struct skl_pipe_wm *active = &intel_crtc->wm.skl_active;
3271         enum pipe pipe = intel_crtc->pipe;
3272         int level, i, max_level;
3273         uint32_t temp;
3274
3275         max_level = ilk_wm_max_level(dev);
3276
3277         hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3278
3279         for (level = 0; level <= max_level; level++) {
3280                 for (i = 0; i < intel_num_planes(intel_crtc); i++)
3281                         hw->plane[pipe][i][level] =
3282                                         I915_READ(PLANE_WM(pipe, i, level));
3283                 hw->cursor[pipe][level] = I915_READ(CUR_WM(pipe, level));
3284         }
3285
3286         for (i = 0; i < intel_num_planes(intel_crtc); i++)
3287                 hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i));
3288         hw->cursor_trans[pipe] = I915_READ(CUR_WM_TRANS(pipe));
3289
3290         if (!intel_crtc_active(crtc))
3291                 return;
3292
3293         hw->dirty[pipe] = true;
3294
3295         active->linetime = hw->wm_linetime[pipe];
3296
3297         for (level = 0; level <= max_level; level++) {
3298                 for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3299                         temp = hw->plane[pipe][i][level];
3300                         skl_pipe_wm_active_state(temp, active, false,
3301                                                 false, i, level);
3302                 }
3303                 temp = hw->cursor[pipe][level];
3304                 skl_pipe_wm_active_state(temp, active, false, true, i, level);
3305         }
3306
3307         for (i = 0; i < intel_num_planes(intel_crtc); i++) {
3308                 temp = hw->plane_trans[pipe][i];
3309                 skl_pipe_wm_active_state(temp, active, true, false, i, 0);
3310         }
3311
3312         temp = hw->cursor_trans[pipe];
3313         skl_pipe_wm_active_state(temp, active, true, true, i, 0);
3314 }
3315
3316 void skl_wm_get_hw_state(struct drm_device *dev)
3317 {
3318         struct drm_i915_private *dev_priv = dev->dev_private;
3319         struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
3320         struct drm_crtc *crtc;
3321
3322         skl_ddb_get_hw_state(dev_priv, ddb);
3323         list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
3324                 skl_pipe_wm_get_hw_state(crtc);
3325 }
3326
3327 static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
3328 {
3329         struct drm_device *dev = crtc->dev;
3330         struct drm_i915_private *dev_priv = dev->dev_private;
3331         struct ilk_wm_values *hw = &dev_priv->wm.hw;
3332         struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
3333         struct intel_pipe_wm *active = &intel_crtc->wm.active;
3334         enum pipe pipe = intel_crtc->pipe;
3335         static const unsigned int wm0_pipe_reg[] = {
3336                 [PIPE_A] = WM0_PIPEA_ILK,
3337                 [PIPE_B] = WM0_PIPEB_ILK,
3338                 [PIPE_C] = WM0_PIPEC_IVB,
3339         };
3340
3341         hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
3342         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
3343                 hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
3344
3345         active->pipe_enabled = intel_crtc_active(crtc);
3346
3347         if (active->pipe_enabled) {
3348                 u32 tmp = hw->wm_pipe[pipe];
3349
3350                 /*
3351                  * For active pipes LP0 watermark is marked as
3352                  * enabled, and LP1+ watermaks as disabled since
3353                  * we can't really reverse compute them in case
3354                  * multiple pipes are active.
3355                  */
3356                 active->wm[0].enable = true;
3357                 active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
3358                 active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
3359                 active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
3360                 active->linetime = hw->wm_linetime[pipe];
3361         } else {
3362                 int level, max_level = ilk_wm_max_level(dev);
3363
3364                 /*
3365                  * For inactive pipes, all watermark levels
3366                  * should be marked as enabled but zeroed,
3367                  * which is what we'd compute them to.
3368                  */
3369                 for (level = 0; level <= max_level; level++)
3370                         active->wm[level].enable = true;
3371         }
3372 }
3373
3374 void ilk_wm_get_hw_state(struct drm_device *dev)
3375 {
3376         struct drm_i915_private *dev_priv = dev->dev_private;
3377         struct ilk_wm_values *hw = &dev_priv->wm.hw;
3378         struct drm_crtc *crtc;
3379
3380         for_each_crtc(dev, crtc)
3381                 ilk_pipe_wm_get_hw_state(crtc);
3382
3383         hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
3384         hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
3385         hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
3386
3387         hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
3388         if (INTEL_INFO(dev)->gen >= 7) {
3389                 hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
3390                 hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
3391         }
3392
3393         if (IS_HASWELL(dev) || IS_BROADWELL(dev))
3394                 hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
3395                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
3396         else if (IS_IVYBRIDGE(dev))
3397                 hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
3398                         INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
3399
3400         hw->enable_fbc_wm =
3401                 !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
3402 }
3403
3404 /**
3405  * intel_update_watermarks - update FIFO watermark values based on current modes
3406  *
3407  * Calculate watermark values for the various WM regs based on current mode
3408  * and plane configuration.
3409  *
3410  * There are several cases to deal with here:
3411  *   - normal (i.e. non-self-refresh)
3412  *   - self-refresh (SR) mode
3413  *   - lines are large relative to FIFO size (buffer can hold up to 2)
3414  *   - lines are small relative to FIFO size (buffer can hold more than 2
3415  *     lines), so need to account for TLB latency
3416  *
3417  *   The normal calculation is:
3418  *     watermark = dotclock * bytes per pixel * latency
3419  *   where latency is platform & configuration dependent (we assume pessimal
3420  *   values here).
3421  *
3422  *   The SR calculation is:
3423  *     watermark = (trunc(latency/line time)+1) * surface width *
3424  *       bytes per pixel
3425  *   where
3426  *     line time = htotal / dotclock
3427  *     surface width = hdisplay for normal plane and 64 for cursor
3428  *   and latency is assumed to be high, as above.
3429  *
3430  * The final value programmed to the register should always be rounded up,
3431  * and include an extra 2 entries to account for clock crossings.
3432  *
3433  * We don't use the sprite, so we can ignore that.  And on Crestline we have
3434  * to set the non-SR watermarks to 8.
3435  */
3436 void intel_update_watermarks(struct drm_crtc *crtc)
3437 {
3438         struct drm_i915_private *dev_priv = crtc->dev->dev_private;
3439
3440         if (dev_priv->display.update_wm)
3441                 dev_priv->display.update_wm(crtc);
3442 }
3443
3444 void intel_update_sprite_watermarks(struct drm_plane *plane,
3445                                     struct drm_crtc *crtc,
3446                                     uint32_t sprite_width,
3447                                     uint32_t sprite_height,
3448                                     int pixel_size,
3449                                     bool enabled, bool scaled)
3450 {
3451         struct drm_i915_private *dev_priv = plane->dev->dev_private;
3452
3453         if (dev_priv->display.update_sprite_wm)
3454                 dev_priv->display.update_sprite_wm(plane, crtc,
3455                                                    sprite_width, sprite_height,
3456                                                    pixel_size, enabled, scaled);
3457 }
3458
3459 static struct drm_i915_gem_object *
3460 intel_alloc_context_page(struct drm_device *dev)
3461 {
3462         struct drm_i915_gem_object *ctx;
3463         int ret;
3464
3465         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
3466
3467         ctx = i915_gem_alloc_object(dev, 4096);
3468         if (!ctx) {
3469                 DRM_DEBUG("failed to alloc power context, RC6 disabled\n");
3470                 return NULL;
3471         }
3472
3473         ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0);
3474         if (ret) {
3475                 DRM_ERROR("failed to pin power context: %d\n", ret);
3476                 goto err_unref;
3477         }
3478
3479         ret = i915_gem_object_set_to_gtt_domain(ctx, 1);
3480         if (ret) {
3481                 DRM_ERROR("failed to set-domain on power context: %d\n", ret);
3482                 goto err_unpin;
3483         }
3484
3485         return ctx;
3486
3487 err_unpin:
3488         i915_gem_object_ggtt_unpin(ctx);
3489 err_unref:
3490         drm_gem_object_unreference(&ctx->base);
3491         return NULL;
3492 }
3493
3494 /**
3495  * Lock protecting IPS related data structures
3496  */
3497 DEFINE_SPINLOCK(mchdev_lock);
3498
3499 /* Global for IPS driver to get at the current i915 device. Protected by
3500  * mchdev_lock. */
3501 static struct drm_i915_private *i915_mch_dev;
3502
3503 bool ironlake_set_drps(struct drm_device *dev, u8 val)
3504 {
3505         struct drm_i915_private *dev_priv = dev->dev_private;
3506         u16 rgvswctl;
3507
3508         assert_spin_locked(&mchdev_lock);
3509
3510         rgvswctl = I915_READ16(MEMSWCTL);
3511         if (rgvswctl & MEMCTL_CMD_STS) {
3512                 DRM_DEBUG("gpu busy, RCS change rejected\n");
3513                 return false; /* still busy with another command */
3514         }
3515
3516         rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
3517                 (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
3518         I915_WRITE16(MEMSWCTL, rgvswctl);
3519         POSTING_READ16(MEMSWCTL);
3520
3521         rgvswctl |= MEMCTL_CMD_STS;
3522         I915_WRITE16(MEMSWCTL, rgvswctl);
3523
3524         return true;
3525 }
3526
3527 static void ironlake_enable_drps(struct drm_device *dev)
3528 {
3529         struct drm_i915_private *dev_priv = dev->dev_private;
3530         u32 rgvmodectl = I915_READ(MEMMODECTL);
3531         u8 fmax, fmin, fstart, vstart;
3532
3533         spin_lock_irq(&mchdev_lock);
3534
3535         /* Enable temp reporting */
3536         I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
3537         I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
3538
3539         /* 100ms RC evaluation intervals */
3540         I915_WRITE(RCUPEI, 100000);
3541         I915_WRITE(RCDNEI, 100000);
3542
3543         /* Set max/min thresholds to 90ms and 80ms respectively */
3544         I915_WRITE(RCBMAXAVG, 90000);
3545         I915_WRITE(RCBMINAVG, 80000);
3546
3547         I915_WRITE(MEMIHYST, 1);
3548
3549         /* Set up min, max, and cur for interrupt handling */
3550         fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
3551         fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
3552         fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
3553                 MEMMODE_FSTART_SHIFT;
3554
3555         vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >>
3556                 PXVFREQ_PX_SHIFT;
3557
3558         dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
3559         dev_priv->ips.fstart = fstart;
3560
3561         dev_priv->ips.max_delay = fstart;
3562         dev_priv->ips.min_delay = fmin;
3563         dev_priv->ips.cur_delay = fstart;
3564
3565         DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
3566                          fmax, fmin, fstart);
3567
3568         I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
3569
3570         /*
3571          * Interrupts will be enabled in ironlake_irq_postinstall
3572          */
3573
3574         I915_WRITE(VIDSTART, vstart);
3575         POSTING_READ(VIDSTART);
3576
3577         rgvmodectl |= MEMMODE_SWMODE_EN;
3578         I915_WRITE(MEMMODECTL, rgvmodectl);
3579
3580         if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
3581                 DRM_ERROR("stuck trying to change perf mode\n");
3582         mdelay(1);
3583
3584         ironlake_set_drps(dev, fstart);
3585
3586         dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) +
3587                 I915_READ(0x112e0);
3588         dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
3589         dev_priv->ips.last_count2 = I915_READ(0x112f4);
3590         dev_priv->ips.last_time2 = ktime_get_raw_ns();
3591
3592         spin_unlock_irq(&mchdev_lock);
3593 }
3594
3595 static void ironlake_disable_drps(struct drm_device *dev)
3596 {
3597         struct drm_i915_private *dev_priv = dev->dev_private;
3598         u16 rgvswctl;
3599
3600         spin_lock_irq(&mchdev_lock);
3601
3602         rgvswctl = I915_READ16(MEMSWCTL);
3603
3604         /* Ack interrupts, disable EFC interrupt */
3605         I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
3606         I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
3607         I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
3608         I915_WRITE(DEIIR, DE_PCU_EVENT);
3609         I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
3610
3611         /* Go back to the starting frequency */
3612         ironlake_set_drps(dev, dev_priv->ips.fstart);
3613         mdelay(1);
3614         rgvswctl |= MEMCTL_CMD_STS;
3615         I915_WRITE(MEMSWCTL, rgvswctl);
3616         mdelay(1);
3617
3618         spin_unlock_irq(&mchdev_lock);
3619 }
3620
3621 /* There's a funny hw issue where the hw returns all 0 when reading from
3622  * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
3623  * ourselves, instead of doing a rmw cycle (which might result in us clearing
3624  * all limits and the gpu stuck at whatever frequency it is at atm).
3625  */
3626 static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val)
3627 {
3628         u32 limits;
3629
3630         /* Only set the down limit when we've reached the lowest level to avoid
3631          * getting more interrupts, otherwise leave this clear. This prevents a
3632          * race in the hw when coming out of rc6: There's a tiny window where
3633          * the hw runs at the minimal clock before selecting the desired
3634          * frequency, if the down threshold expires in that window we will not
3635          * receive a down interrupt. */
3636         limits = dev_priv->rps.max_freq_softlimit << 24;
3637         if (val <= dev_priv->rps.min_freq_softlimit)
3638                 limits |= dev_priv->rps.min_freq_softlimit << 16;
3639
3640         return limits;
3641 }
3642
3643 static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
3644 {
3645         int new_power;
3646
3647         new_power = dev_priv->rps.power;
3648         switch (dev_priv->rps.power) {
3649         case LOW_POWER:
3650                 if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq)
3651                         new_power = BETWEEN;
3652                 break;
3653
3654         case BETWEEN:
3655                 if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq)
3656                         new_power = LOW_POWER;
3657                 else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq)
3658                         new_power = HIGH_POWER;
3659                 break;
3660
3661         case HIGH_POWER:
3662                 if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq)
3663                         new_power = BETWEEN;
3664                 break;
3665         }
3666         /* Max/min bins are special */
3667         if (val == dev_priv->rps.min_freq_softlimit)
3668                 new_power = LOW_POWER;
3669         if (val == dev_priv->rps.max_freq_softlimit)
3670                 new_power = HIGH_POWER;
3671         if (new_power == dev_priv->rps.power)
3672                 return;
3673
3674         /* Note the units here are not exactly 1us, but 1280ns. */
3675         switch (new_power) {
3676         case LOW_POWER:
3677                 /* Upclock if more than 95% busy over 16ms */
3678                 I915_WRITE(GEN6_RP_UP_EI, 12500);
3679                 I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800);
3680
3681                 /* Downclock if less than 85% busy over 32ms */
3682                 I915_WRITE(GEN6_RP_DOWN_EI, 25000);
3683                 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250);
3684
3685                 I915_WRITE(GEN6_RP_CONTROL,
3686                            GEN6_RP_MEDIA_TURBO |
3687                            GEN6_RP_MEDIA_HW_NORMAL_MODE |
3688                            GEN6_RP_MEDIA_IS_GFX |
3689                            GEN6_RP_ENABLE |
3690                            GEN6_RP_UP_BUSY_AVG |
3691                            GEN6_RP_DOWN_IDLE_AVG);
3692                 break;
3693
3694         case BETWEEN:
3695                 /* Upclock if more than 90% busy over 13ms */
3696                 I915_WRITE(GEN6_RP_UP_EI, 10250);
3697                 I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225);
3698
3699                 /* Downclock if less than 75% busy over 32ms */
3700                 I915_WRITE(GEN6_RP_DOWN_EI, 25000);
3701                 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750);
3702
3703                 I915_WRITE(GEN6_RP_CONTROL,
3704                            GEN6_RP_MEDIA_TURBO |
3705                            GEN6_RP_MEDIA_HW_NORMAL_MODE |
3706                            GEN6_RP_MEDIA_IS_GFX |
3707                            GEN6_RP_ENABLE |
3708                            GEN6_RP_UP_BUSY_AVG |
3709                            GEN6_RP_DOWN_IDLE_AVG);
3710                 break;
3711
3712         case HIGH_POWER:
3713                 /* Upclock if more than 85% busy over 10ms */
3714                 I915_WRITE(GEN6_RP_UP_EI, 8000);
3715                 I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800);
3716
3717                 /* Downclock if less than 60% busy over 32ms */
3718                 I915_WRITE(GEN6_RP_DOWN_EI, 25000);
3719                 I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000);
3720
3721                 I915_WRITE(GEN6_RP_CONTROL,
3722                            GEN6_RP_MEDIA_TURBO |
3723                            GEN6_RP_MEDIA_HW_NORMAL_MODE |
3724                            GEN6_RP_MEDIA_IS_GFX |
3725                            GEN6_RP_ENABLE |
3726                            GEN6_RP_UP_BUSY_AVG |
3727                            GEN6_RP_DOWN_IDLE_AVG);
3728                 break;
3729         }
3730
3731         dev_priv->rps.power = new_power;
3732         dev_priv->rps.last_adj = 0;
3733 }
3734
3735 static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
3736 {
3737         u32 mask = 0;
3738
3739         if (val > dev_priv->rps.min_freq_softlimit)
3740                 mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
3741         if (val < dev_priv->rps.max_freq_softlimit)
3742                 mask |= GEN6_PM_RP_UP_THRESHOLD;
3743
3744         mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
3745         mask &= dev_priv->pm_rps_events;
3746
3747         return gen6_sanitize_rps_pm_mask(dev_priv, ~mask);
3748 }
3749
3750 /* gen6_set_rps is called to update the frequency request, but should also be
3751  * called when the range (min_delay and max_delay) is modified so that we can
3752  * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
3753 void gen6_set_rps(struct drm_device *dev, u8 val)
3754 {
3755         struct drm_i915_private *dev_priv = dev->dev_private;
3756
3757         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3758         WARN_ON(val > dev_priv->rps.max_freq_softlimit);
3759         WARN_ON(val < dev_priv->rps.min_freq_softlimit);
3760
3761         /* min/max delay may still have been modified so be sure to
3762          * write the limits value.
3763          */
3764         if (val != dev_priv->rps.cur_freq) {
3765                 gen6_set_rps_thresholds(dev_priv, val);
3766
3767                 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
3768                         I915_WRITE(GEN6_RPNSWREQ,
3769                                    HSW_FREQUENCY(val));
3770                 else
3771                         I915_WRITE(GEN6_RPNSWREQ,
3772                                    GEN6_FREQUENCY(val) |
3773                                    GEN6_OFFSET(0) |
3774                                    GEN6_AGGRESSIVE_TURBO);
3775         }
3776
3777         /* Make sure we continue to get interrupts
3778          * until we hit the minimum or maximum frequencies.
3779          */
3780         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, gen6_rps_limits(dev_priv, val));
3781         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
3782
3783         POSTING_READ(GEN6_RPNSWREQ);
3784
3785         dev_priv->rps.cur_freq = val;
3786         trace_intel_gpu_freq_change(val * 50);
3787 }
3788
3789 /* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down
3790  *
3791  * * If Gfx is Idle, then
3792  * 1. Mask Turbo interrupts
3793  * 2. Bring up Gfx clock
3794  * 3. Change the freq to Rpn and wait till P-Unit updates freq
3795  * 4. Clear the Force GFX CLK ON bit so that Gfx can down
3796  * 5. Unmask Turbo interrupts
3797 */
3798 static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
3799 {
3800         struct drm_device *dev = dev_priv->dev;
3801
3802         /* CHV and latest VLV don't need to force the gfx clock */
3803         if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) {
3804                 valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
3805                 return;
3806         }
3807
3808         /*
3809          * When we are idle.  Drop to min voltage state.
3810          */
3811
3812         if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit)
3813                 return;
3814
3815         /* Mask turbo interrupt so that they will not come in between */
3816         I915_WRITE(GEN6_PMINTRMSK,
3817                    gen6_sanitize_rps_pm_mask(dev_priv, ~0));
3818
3819         vlv_force_gfx_clock(dev_priv, true);
3820
3821         dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit;
3822
3823         vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ,
3824                                         dev_priv->rps.min_freq_softlimit);
3825
3826         if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
3827                                 & GENFREQSTATUS) == 0, 100))
3828                 DRM_ERROR("timed out waiting for Punit\n");
3829
3830         vlv_force_gfx_clock(dev_priv, false);
3831
3832         I915_WRITE(GEN6_PMINTRMSK,
3833                    gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
3834 }
3835
3836 void gen6_rps_idle(struct drm_i915_private *dev_priv)
3837 {
3838         struct drm_device *dev = dev_priv->dev;
3839
3840         mutex_lock(&dev_priv->rps.hw_lock);
3841         if (dev_priv->rps.enabled) {
3842                 if (IS_VALLEYVIEW(dev))
3843                         vlv_set_rps_idle(dev_priv);
3844                 else
3845                         gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
3846                 dev_priv->rps.last_adj = 0;
3847         }
3848         mutex_unlock(&dev_priv->rps.hw_lock);
3849 }
3850
3851 void gen6_rps_boost(struct drm_i915_private *dev_priv)
3852 {
3853         struct drm_device *dev = dev_priv->dev;
3854
3855         mutex_lock(&dev_priv->rps.hw_lock);
3856         if (dev_priv->rps.enabled) {
3857                 if (IS_VALLEYVIEW(dev))
3858                         valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
3859                 else
3860                         gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
3861                 dev_priv->rps.last_adj = 0;
3862         }
3863         mutex_unlock(&dev_priv->rps.hw_lock);
3864 }
3865
3866 void valleyview_set_rps(struct drm_device *dev, u8 val)
3867 {
3868         struct drm_i915_private *dev_priv = dev->dev_private;
3869
3870         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
3871         WARN_ON(val > dev_priv->rps.max_freq_softlimit);
3872         WARN_ON(val < dev_priv->rps.min_freq_softlimit);
3873
3874         if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
3875                       "Odd GPU freq value\n"))
3876                 val &= ~1;
3877
3878         if (val != dev_priv->rps.cur_freq)
3879                 vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
3880
3881         I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
3882
3883         dev_priv->rps.cur_freq = val;
3884         trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
3885 }
3886
3887 static void gen9_disable_rps(struct drm_device *dev)
3888 {
3889         struct drm_i915_private *dev_priv = dev->dev_private;
3890
3891         I915_WRITE(GEN6_RC_CONTROL, 0);
3892         I915_WRITE(GEN9_PG_ENABLE, 0);
3893 }
3894
3895 static void gen6_disable_rps(struct drm_device *dev)
3896 {
3897         struct drm_i915_private *dev_priv = dev->dev_private;
3898
3899         I915_WRITE(GEN6_RC_CONTROL, 0);
3900         I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
3901 }
3902
3903 static void cherryview_disable_rps(struct drm_device *dev)
3904 {
3905         struct drm_i915_private *dev_priv = dev->dev_private;
3906
3907         I915_WRITE(GEN6_RC_CONTROL, 0);
3908 }
3909
3910 static void valleyview_disable_rps(struct drm_device *dev)
3911 {
3912         struct drm_i915_private *dev_priv = dev->dev_private;
3913
3914         /* we're doing forcewake before Disabling RC6,
3915          * This what the BIOS expects when going into suspend */
3916         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
3917
3918         I915_WRITE(GEN6_RC_CONTROL, 0);
3919
3920         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
3921 }
3922
3923 static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
3924 {
3925         if (IS_VALLEYVIEW(dev)) {
3926                 if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
3927                         mode = GEN6_RC_CTL_RC6_ENABLE;
3928                 else
3929                         mode = 0;
3930         }
3931         if (HAS_RC6p(dev))
3932                 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n",
3933                               (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
3934                               (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
3935                               (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
3936
3937         else
3938                 DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n",
3939                               (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
3940 }
3941
3942 static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
3943 {
3944         /* No RC6 before Ironlake */
3945         if (INTEL_INFO(dev)->gen < 5)
3946                 return 0;
3947
3948         /* RC6 is only on Ironlake mobile not on desktop */
3949         if (INTEL_INFO(dev)->gen == 5 && !IS_IRONLAKE_M(dev))
3950                 return 0;
3951
3952         /* Respect the kernel parameter if it is set */
3953         if (enable_rc6 >= 0) {
3954                 int mask;
3955
3956                 if (HAS_RC6p(dev))
3957                         mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
3958                                INTEL_RC6pp_ENABLE;
3959                 else
3960                         mask = INTEL_RC6_ENABLE;
3961
3962                 if ((enable_rc6 & mask) != enable_rc6)
3963                         DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n",
3964                                       enable_rc6 & mask, enable_rc6, mask);
3965
3966                 return enable_rc6 & mask;
3967         }
3968
3969         /* Disable RC6 on Ironlake */
3970         if (INTEL_INFO(dev)->gen == 5)
3971                 return 0;
3972
3973         if (IS_IVYBRIDGE(dev))
3974                 return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
3975
3976         return INTEL_RC6_ENABLE;
3977 }
3978
3979 int intel_enable_rc6(const struct drm_device *dev)
3980 {
3981         return i915.enable_rc6;
3982 }
3983
3984 static void gen6_init_rps_frequencies(struct drm_device *dev)
3985 {
3986         struct drm_i915_private *dev_priv = dev->dev_private;
3987         uint32_t rp_state_cap;
3988         u32 ddcc_status = 0;
3989         int ret;
3990
3991         rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
3992         /* All of these values are in units of 50MHz */
3993         dev_priv->rps.cur_freq          = 0;
3994         /* static values from HW: RP0 > RP1 > RPn (min_freq) */
3995         dev_priv->rps.rp0_freq          = (rp_state_cap >>  0) & 0xff;
3996         dev_priv->rps.rp1_freq          = (rp_state_cap >>  8) & 0xff;
3997         dev_priv->rps.min_freq          = (rp_state_cap >> 16) & 0xff;
3998         /* hw_max = RP0 until we check for overclocking */
3999         dev_priv->rps.max_freq          = dev_priv->rps.rp0_freq;
4000
4001         dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
4002         if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
4003                 ret = sandybridge_pcode_read(dev_priv,
4004                                         HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
4005                                         &ddcc_status);
4006                 if (0 == ret)
4007                         dev_priv->rps.efficient_freq =
4008                                 clamp_t(u8,
4009                                         ((ddcc_status >> 8) & 0xff),
4010                                         dev_priv->rps.min_freq,
4011                                         dev_priv->rps.max_freq);
4012         }
4013
4014         /* Preserve min/max settings in case of re-init */
4015         if (dev_priv->rps.max_freq_softlimit == 0)
4016                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
4017
4018         if (dev_priv->rps.min_freq_softlimit == 0) {
4019                 if (IS_HASWELL(dev) || IS_BROADWELL(dev))
4020                         dev_priv->rps.min_freq_softlimit =
4021                                 /* max(RPe, 450 MHz) */
4022                                 max(dev_priv->rps.efficient_freq, (u8) 9);
4023                 else
4024                         dev_priv->rps.min_freq_softlimit =
4025                                 dev_priv->rps.min_freq;
4026         }
4027 }
4028
4029 /* See the Gen9_GT_PM_Programming_Guide doc for the below */
4030 static void gen9_enable_rps(struct drm_device *dev)
4031 {
4032         struct drm_i915_private *dev_priv = dev->dev_private;
4033
4034         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4035
4036         gen6_init_rps_frequencies(dev);
4037
4038         I915_WRITE(GEN6_RPNSWREQ, 0xc800000);
4039         I915_WRITE(GEN6_RC_VIDEO_FREQ, 0xc800000);
4040
4041         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
4042         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, 0x12060000);
4043         I915_WRITE(GEN6_RP_UP_THRESHOLD, 0xe808);
4044         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 0x3bd08);
4045         I915_WRITE(GEN6_RP_UP_EI, 0x101d0);
4046         I915_WRITE(GEN6_RP_DOWN_EI, 0x55730);
4047         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 0xa);
4048         I915_WRITE(GEN6_PMINTRMSK, 0x6);
4049         I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_TURBO |
4050                    GEN6_RP_MEDIA_HW_MODE | GEN6_RP_MEDIA_IS_GFX |
4051                    GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG |
4052                    GEN6_RP_DOWN_IDLE_AVG);
4053
4054         gen6_enable_rps_interrupts(dev);
4055
4056         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4057 }
4058
4059 static void gen9_enable_rc6(struct drm_device *dev)
4060 {
4061         struct drm_i915_private *dev_priv = dev->dev_private;
4062         struct intel_engine_cs *ring;
4063         uint32_t rc6_mask = 0;
4064         int unused;
4065
4066         /* 1a: Software RC state - RC0 */
4067         I915_WRITE(GEN6_RC_STATE, 0);
4068
4069         /* 1b: Get forcewake during program sequence. Although the driver
4070          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4071         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4072
4073         /* 2a: Disable RC states. */
4074         I915_WRITE(GEN6_RC_CONTROL, 0);
4075
4076         /* 2b: Program RC6 thresholds.*/
4077         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
4078         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4079         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4080         for_each_ring(ring, dev_priv, unused)
4081                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4082         I915_WRITE(GEN6_RC_SLEEP, 0);
4083         I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
4084
4085         /* 2c: Program Coarse Power Gating Policies. */
4086         I915_WRITE(GEN9_MEDIA_PG_IDLE_HYSTERESIS, 25);
4087         I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
4088
4089         /* 3a: Enable RC6 */
4090         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4091                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4092         DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
4093                         "on" : "off");
4094         I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4095                                    GEN6_RC_CTL_EI_MODE(1) |
4096                                    rc6_mask);
4097
4098         /* 3b: Enable Coarse Power Gating only when RC6 is enabled */
4099         I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? 3 : 0);
4100
4101         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4102
4103 }
4104
4105 static void gen8_enable_rps(struct drm_device *dev)
4106 {
4107         struct drm_i915_private *dev_priv = dev->dev_private;
4108         struct intel_engine_cs *ring;
4109         uint32_t rc6_mask = 0;
4110         int unused;
4111
4112         /* 1a: Software RC state - RC0 */
4113         I915_WRITE(GEN6_RC_STATE, 0);
4114
4115         /* 1c & 1d: Get forcewake during program sequence. Although the driver
4116          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4117         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4118
4119         /* 2a: Disable RC states. */
4120         I915_WRITE(GEN6_RC_CONTROL, 0);
4121
4122         /* Initialize rps frequencies */
4123         gen6_init_rps_frequencies(dev);
4124
4125         /* 2b: Program RC6 thresholds.*/
4126         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
4127         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4128         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4129         for_each_ring(ring, dev_priv, unused)
4130                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4131         I915_WRITE(GEN6_RC_SLEEP, 0);
4132         if (IS_BROADWELL(dev))
4133                 I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
4134         else
4135                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
4136
4137         /* 3: Enable RC6 */
4138         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4139                 rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
4140         intel_print_rc6_info(dev, rc6_mask);
4141         if (IS_BROADWELL(dev))
4142                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4143                                 GEN7_RC_CTL_TO_MODE |
4144                                 rc6_mask);
4145         else
4146                 I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
4147                                 GEN6_RC_CTL_EI_MODE(1) |
4148                                 rc6_mask);
4149
4150         /* 4 Program defaults and thresholds for RPS*/
4151         I915_WRITE(GEN6_RPNSWREQ,
4152                    HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4153         I915_WRITE(GEN6_RC_VIDEO_FREQ,
4154                    HSW_FREQUENCY(dev_priv->rps.rp1_freq));
4155         /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
4156         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
4157
4158         /* Docs recommend 900MHz, and 300 MHz respectively */
4159         I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
4160                    dev_priv->rps.max_freq_softlimit << 24 |
4161                    dev_priv->rps.min_freq_softlimit << 16);
4162
4163         I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
4164         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
4165         I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
4166         I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
4167
4168         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4169
4170         /* 5: Enable RPS */
4171         I915_WRITE(GEN6_RP_CONTROL,
4172                    GEN6_RP_MEDIA_TURBO |
4173                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
4174                    GEN6_RP_MEDIA_IS_GFX |
4175                    GEN6_RP_ENABLE |
4176                    GEN6_RP_UP_BUSY_AVG |
4177                    GEN6_RP_DOWN_IDLE_AVG);
4178
4179         /* 6: Ring frequency + overclocking (our driver does this later */
4180
4181         dev_priv->rps.power = HIGH_POWER; /* force a reset */
4182         gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
4183
4184         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4185 }
4186
4187 static void gen6_enable_rps(struct drm_device *dev)
4188 {
4189         struct drm_i915_private *dev_priv = dev->dev_private;
4190         struct intel_engine_cs *ring;
4191         u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
4192         u32 gtfifodbg;
4193         int rc6_mode;
4194         int i, ret;
4195
4196         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4197
4198         /* Here begins a magic sequence of register writes to enable
4199          * auto-downclocking.
4200          *
4201          * Perhaps there might be some value in exposing these to
4202          * userspace...
4203          */
4204         I915_WRITE(GEN6_RC_STATE, 0);
4205
4206         /* Clear the DBG now so we don't confuse earlier errors */
4207         if ((gtfifodbg = I915_READ(GTFIFODBG))) {
4208                 DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
4209                 I915_WRITE(GTFIFODBG, gtfifodbg);
4210         }
4211
4212         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4213
4214         /* Initialize rps frequencies */
4215         gen6_init_rps_frequencies(dev);
4216
4217         /* disable the counters and set deterministic thresholds */
4218         I915_WRITE(GEN6_RC_CONTROL, 0);
4219
4220         I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
4221         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
4222         I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
4223         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
4224         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
4225
4226         for_each_ring(ring, dev_priv, i)
4227                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4228
4229         I915_WRITE(GEN6_RC_SLEEP, 0);
4230         I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
4231         if (IS_IVYBRIDGE(dev))
4232                 I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
4233         else
4234                 I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
4235         I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
4236         I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
4237
4238         /* Check if we are enabling RC6 */
4239         rc6_mode = intel_enable_rc6(dev_priv->dev);
4240         if (rc6_mode & INTEL_RC6_ENABLE)
4241                 rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
4242
4243         /* We don't use those on Haswell */
4244         if (!IS_HASWELL(dev)) {
4245                 if (rc6_mode & INTEL_RC6p_ENABLE)
4246                         rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
4247
4248                 if (rc6_mode & INTEL_RC6pp_ENABLE)
4249                         rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
4250         }
4251
4252         intel_print_rc6_info(dev, rc6_mask);
4253
4254         I915_WRITE(GEN6_RC_CONTROL,
4255                    rc6_mask |
4256                    GEN6_RC_CTL_EI_MODE(1) |
4257                    GEN6_RC_CTL_HW_ENABLE);
4258
4259         /* Power down if completely idle for over 50ms */
4260         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
4261         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4262
4263         ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
4264         if (ret)
4265                 DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
4266
4267         ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
4268         if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
4269                 DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
4270                                  (dev_priv->rps.max_freq_softlimit & 0xff) * 50,
4271                                  (pcu_mbox & 0xff) * 50);
4272                 dev_priv->rps.max_freq = pcu_mbox & 0xff;
4273         }
4274
4275         dev_priv->rps.power = HIGH_POWER; /* force a reset */
4276         gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
4277
4278         rc6vids = 0;
4279         ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
4280         if (IS_GEN6(dev) && ret) {
4281                 DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
4282         } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
4283                 DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
4284                           GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
4285                 rc6vids &= 0xffff00;
4286                 rc6vids |= GEN6_ENCODE_RC6_VID(450);
4287                 ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
4288                 if (ret)
4289                         DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
4290         }
4291
4292         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4293 }
4294
4295 static void __gen6_update_ring_freq(struct drm_device *dev)
4296 {
4297         struct drm_i915_private *dev_priv = dev->dev_private;
4298         int min_freq = 15;
4299         unsigned int gpu_freq;
4300         unsigned int max_ia_freq, min_ring_freq;
4301         int scaling_factor = 180;
4302         struct cpufreq_policy *policy;
4303
4304         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4305
4306         policy = cpufreq_cpu_get(0);
4307         if (policy) {
4308                 max_ia_freq = policy->cpuinfo.max_freq;
4309                 cpufreq_cpu_put(policy);
4310         } else {
4311                 /*
4312                  * Default to measured freq if none found, PCU will ensure we
4313                  * don't go over
4314                  */
4315                 max_ia_freq = tsc_khz;
4316         }
4317
4318         /* Convert from kHz to MHz */
4319         max_ia_freq /= 1000;
4320
4321         min_ring_freq = I915_READ(DCLK) & 0xf;
4322         /* convert DDR frequency from units of 266.6MHz to bandwidth */
4323         min_ring_freq = mult_frac(min_ring_freq, 8, 3);
4324
4325         /*
4326          * For each potential GPU frequency, load a ring frequency we'd like
4327          * to use for memory access.  We do this by specifying the IA frequency
4328          * the PCU should use as a reference to determine the ring frequency.
4329          */
4330         for (gpu_freq = dev_priv->rps.max_freq; gpu_freq >= dev_priv->rps.min_freq;
4331              gpu_freq--) {
4332                 int diff = dev_priv->rps.max_freq - gpu_freq;
4333                 unsigned int ia_freq = 0, ring_freq = 0;
4334
4335                 if (INTEL_INFO(dev)->gen >= 8) {
4336                         /* max(2 * GT, DDR). NB: GT is 50MHz units */
4337                         ring_freq = max(min_ring_freq, gpu_freq);
4338                 } else if (IS_HASWELL(dev)) {
4339                         ring_freq = mult_frac(gpu_freq, 5, 4);
4340                         ring_freq = max(min_ring_freq, ring_freq);
4341                         /* leave ia_freq as the default, chosen by cpufreq */
4342                 } else {
4343                         /* On older processors, there is no separate ring
4344                          * clock domain, so in order to boost the bandwidth
4345                          * of the ring, we need to upclock the CPU (ia_freq).
4346                          *
4347                          * For GPU frequencies less than 750MHz,
4348                          * just use the lowest ring freq.
4349                          */
4350                         if (gpu_freq < min_freq)
4351                                 ia_freq = 800;
4352                         else
4353                                 ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
4354                         ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
4355                 }
4356
4357                 sandybridge_pcode_write(dev_priv,
4358                                         GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
4359                                         ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
4360                                         ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
4361                                         gpu_freq);
4362         }
4363 }
4364
4365 void gen6_update_ring_freq(struct drm_device *dev)
4366 {
4367         struct drm_i915_private *dev_priv = dev->dev_private;
4368
4369         if (INTEL_INFO(dev)->gen < 6 || IS_VALLEYVIEW(dev))
4370                 return;
4371
4372         mutex_lock(&dev_priv->rps.hw_lock);
4373         __gen6_update_ring_freq(dev);
4374         mutex_unlock(&dev_priv->rps.hw_lock);
4375 }
4376
4377 static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
4378 {
4379         struct drm_device *dev = dev_priv->dev;
4380         u32 val, rp0;
4381
4382         if (dev->pdev->revision >= 0x20) {
4383                 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
4384
4385                 switch (INTEL_INFO(dev)->eu_total) {
4386                 case 8:
4387                                 /* (2 * 4) config */
4388                                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT);
4389                                 break;
4390                 case 12:
4391                                 /* (2 * 6) config */
4392                                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT);
4393                                 break;
4394                 case 16:
4395                                 /* (2 * 8) config */
4396                 default:
4397                                 /* Setting (2 * 8) Min RP0 for any other combination */
4398                                 rp0 = (val >> FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT);
4399                                 break;
4400                 }
4401                 rp0 = (rp0 & FB_GFX_FREQ_FUSE_MASK);
4402         } else {
4403                 /* For pre-production hardware */
4404                 val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
4405                 rp0 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) &
4406                        PUNIT_GPU_STATUS_MAX_FREQ_MASK;
4407         }
4408         return rp0;
4409 }
4410
4411 static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
4412 {
4413         u32 val, rpe;
4414
4415         val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
4416         rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
4417
4418         return rpe;
4419 }
4420
4421 static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
4422 {
4423         struct drm_device *dev = dev_priv->dev;
4424         u32 val, rp1;
4425
4426         if (dev->pdev->revision >= 0x20) {
4427                 val = vlv_punit_read(dev_priv, FB_GFX_FMAX_AT_VMAX_FUSE);
4428                 rp1 = (val & FB_GFX_FREQ_FUSE_MASK);
4429         } else {
4430                 /* For pre-production hardware */
4431                 val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
4432                 rp1 = ((val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) &
4433                        PUNIT_GPU_STATUS_MAX_FREQ_MASK);
4434         }
4435         return rp1;
4436 }
4437
4438 static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
4439 {
4440         struct drm_device *dev = dev_priv->dev;
4441         u32 val, rpn;
4442
4443         if (dev->pdev->revision >= 0x20) {
4444                 val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
4445                 rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
4446                        FB_GFX_FREQ_FUSE_MASK);
4447         } else { /* For pre-production hardware */
4448                 val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
4449                 rpn = ((val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) &
4450                        PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK);
4451         }
4452
4453         return rpn;
4454 }
4455
4456 static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
4457 {
4458         u32 val, rp1;
4459
4460         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
4461
4462         rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
4463
4464         return rp1;
4465 }
4466
4467 static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
4468 {
4469         u32 val, rp0;
4470
4471         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
4472
4473         rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
4474         /* Clamp to max */
4475         rp0 = min_t(u32, rp0, 0xea);
4476
4477         return rp0;
4478 }
4479
4480 static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
4481 {
4482         u32 val, rpe;
4483
4484         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
4485         rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
4486         val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
4487         rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
4488
4489         return rpe;
4490 }
4491
4492 static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
4493 {
4494         return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
4495 }
4496
4497 /* Check that the pctx buffer wasn't move under us. */
4498 static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
4499 {
4500         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
4501
4502         WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
4503                              dev_priv->vlv_pctx->stolen->start);
4504 }
4505
4506
4507 /* Check that the pcbr address is not empty. */
4508 static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
4509 {
4510         unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
4511
4512         WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
4513 }
4514
4515 static void cherryview_setup_pctx(struct drm_device *dev)
4516 {
4517         struct drm_i915_private *dev_priv = dev->dev_private;
4518         unsigned long pctx_paddr, paddr;
4519         struct i915_gtt *gtt = &dev_priv->gtt;
4520         u32 pcbr;
4521         int pctx_size = 32*1024;
4522
4523         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
4524
4525         pcbr = I915_READ(VLV_PCBR);
4526         if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
4527                 DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
4528                 paddr = (dev_priv->mm.stolen_base +
4529                          (gtt->stolen_size - pctx_size));
4530
4531                 pctx_paddr = (paddr & (~4095));
4532                 I915_WRITE(VLV_PCBR, pctx_paddr);
4533         }
4534
4535         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
4536 }
4537
4538 static void valleyview_setup_pctx(struct drm_device *dev)
4539 {
4540         struct drm_i915_private *dev_priv = dev->dev_private;
4541         struct drm_i915_gem_object *pctx;
4542         unsigned long pctx_paddr;
4543         u32 pcbr;
4544         int pctx_size = 24*1024;
4545
4546         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
4547
4548         pcbr = I915_READ(VLV_PCBR);
4549         if (pcbr) {
4550                 /* BIOS set it up already, grab the pre-alloc'd space */
4551                 int pcbr_offset;
4552
4553                 pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
4554                 pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
4555                                                                       pcbr_offset,
4556                                                                       I915_GTT_OFFSET_NONE,
4557                                                                       pctx_size);
4558                 goto out;
4559         }
4560
4561         DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
4562
4563         /*
4564          * From the Gunit register HAS:
4565          * The Gfx driver is expected to program this register and ensure
4566          * proper allocation within Gfx stolen memory.  For example, this
4567          * register should be programmed such than the PCBR range does not
4568          * overlap with other ranges, such as the frame buffer, protected
4569          * memory, or any other relevant ranges.
4570          */
4571         pctx = i915_gem_object_create_stolen(dev, pctx_size);
4572         if (!pctx) {
4573                 DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
4574                 return;
4575         }
4576
4577         pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
4578         I915_WRITE(VLV_PCBR, pctx_paddr);
4579
4580 out:
4581         DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
4582         dev_priv->vlv_pctx = pctx;
4583 }
4584
4585 static void valleyview_cleanup_pctx(struct drm_device *dev)
4586 {
4587         struct drm_i915_private *dev_priv = dev->dev_private;
4588
4589         if (WARN_ON(!dev_priv->vlv_pctx))
4590                 return;
4591
4592         drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
4593         dev_priv->vlv_pctx = NULL;
4594 }
4595
4596 static void valleyview_init_gt_powersave(struct drm_device *dev)
4597 {
4598         struct drm_i915_private *dev_priv = dev->dev_private;
4599         u32 val;
4600
4601         valleyview_setup_pctx(dev);
4602
4603         mutex_lock(&dev_priv->rps.hw_lock);
4604
4605         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
4606         switch ((val >> 6) & 3) {
4607         case 0:
4608         case 1:
4609                 dev_priv->mem_freq = 800;
4610                 break;
4611         case 2:
4612                 dev_priv->mem_freq = 1066;
4613                 break;
4614         case 3:
4615                 dev_priv->mem_freq = 1333;
4616                 break;
4617         }
4618         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
4619
4620         dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
4621         dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
4622         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
4623                          intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
4624                          dev_priv->rps.max_freq);
4625
4626         dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
4627         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
4628                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
4629                          dev_priv->rps.efficient_freq);
4630
4631         dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv);
4632         DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
4633                          intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
4634                          dev_priv->rps.rp1_freq);
4635
4636         dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
4637         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
4638                          intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
4639                          dev_priv->rps.min_freq);
4640
4641         /* Preserve min/max settings in case of re-init */
4642         if (dev_priv->rps.max_freq_softlimit == 0)
4643                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
4644
4645         if (dev_priv->rps.min_freq_softlimit == 0)
4646                 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
4647
4648         mutex_unlock(&dev_priv->rps.hw_lock);
4649 }
4650
4651 static void cherryview_init_gt_powersave(struct drm_device *dev)
4652 {
4653         struct drm_i915_private *dev_priv = dev->dev_private;
4654         u32 val;
4655
4656         cherryview_setup_pctx(dev);
4657
4658         mutex_lock(&dev_priv->rps.hw_lock);
4659
4660         mutex_lock(&dev_priv->dpio_lock);
4661         val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
4662         mutex_unlock(&dev_priv->dpio_lock);
4663
4664         switch ((val >> 2) & 0x7) {
4665         case 0:
4666         case 1:
4667                 dev_priv->rps.cz_freq = 200;
4668                 dev_priv->mem_freq = 1600;
4669                 break;
4670         case 2:
4671                 dev_priv->rps.cz_freq = 267;
4672                 dev_priv->mem_freq = 1600;
4673                 break;
4674         case 3:
4675                 dev_priv->rps.cz_freq = 333;
4676                 dev_priv->mem_freq = 2000;
4677                 break;
4678         case 4:
4679                 dev_priv->rps.cz_freq = 320;
4680                 dev_priv->mem_freq = 1600;
4681                 break;
4682         case 5:
4683                 dev_priv->rps.cz_freq = 400;
4684                 dev_priv->mem_freq = 1600;
4685                 break;
4686         }
4687         DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
4688
4689         dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
4690         dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
4691         DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
4692                          intel_gpu_freq(dev_priv, dev_priv->rps.max_freq),
4693                          dev_priv->rps.max_freq);
4694
4695         dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv);
4696         DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
4697                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
4698                          dev_priv->rps.efficient_freq);
4699
4700         dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv);
4701         DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
4702                          intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
4703                          dev_priv->rps.rp1_freq);
4704
4705         dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv);
4706         DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
4707                          intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
4708                          dev_priv->rps.min_freq);
4709
4710         WARN_ONCE((dev_priv->rps.max_freq |
4711                    dev_priv->rps.efficient_freq |
4712                    dev_priv->rps.rp1_freq |
4713                    dev_priv->rps.min_freq) & 1,
4714                   "Odd GPU freq values\n");
4715
4716         /* Preserve min/max settings in case of re-init */
4717         if (dev_priv->rps.max_freq_softlimit == 0)
4718                 dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
4719
4720         if (dev_priv->rps.min_freq_softlimit == 0)
4721                 dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
4722
4723         mutex_unlock(&dev_priv->rps.hw_lock);
4724 }
4725
4726 static void valleyview_cleanup_gt_powersave(struct drm_device *dev)
4727 {
4728         valleyview_cleanup_pctx(dev);
4729 }
4730
4731 static void cherryview_enable_rps(struct drm_device *dev)
4732 {
4733         struct drm_i915_private *dev_priv = dev->dev_private;
4734         struct intel_engine_cs *ring;
4735         u32 gtfifodbg, val, rc6_mode = 0, pcbr;
4736         int i;
4737
4738         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4739
4740         gtfifodbg = I915_READ(GTFIFODBG);
4741         if (gtfifodbg) {
4742                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
4743                                  gtfifodbg);
4744                 I915_WRITE(GTFIFODBG, gtfifodbg);
4745         }
4746
4747         cherryview_check_pctx(dev_priv);
4748
4749         /* 1a & 1b: Get forcewake during program sequence. Although the driver
4750          * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
4751         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4752
4753         /*  Disable RC states. */
4754         I915_WRITE(GEN6_RC_CONTROL, 0);
4755
4756         /* 2a: Program RC6 thresholds.*/
4757         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
4758         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
4759         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
4760
4761         for_each_ring(ring, dev_priv, i)
4762                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4763         I915_WRITE(GEN6_RC_SLEEP, 0);
4764
4765         /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */
4766         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
4767
4768         /* allows RC6 residency counter to work */
4769         I915_WRITE(VLV_COUNTER_CONTROL,
4770                    _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
4771                                       VLV_MEDIA_RC6_COUNT_EN |
4772                                       VLV_RENDER_RC6_COUNT_EN));
4773
4774         /* For now we assume BIOS is allocating and populating the PCBR  */
4775         pcbr = I915_READ(VLV_PCBR);
4776
4777         /* 3: Enable RC6 */
4778         if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) &&
4779                                                 (pcbr >> VLV_PCBR_ADDR_SHIFT))
4780                 rc6_mode = GEN7_RC_CTL_TO_MODE;
4781
4782         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
4783
4784         /* 4 Program defaults and thresholds for RPS*/
4785         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
4786         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
4787         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
4788         I915_WRITE(GEN6_RP_UP_EI, 66000);
4789         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
4790
4791         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4792
4793         /* 5: Enable RPS */
4794         I915_WRITE(GEN6_RP_CONTROL,
4795                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
4796                    GEN6_RP_MEDIA_IS_GFX |
4797                    GEN6_RP_ENABLE |
4798                    GEN6_RP_UP_BUSY_AVG |
4799                    GEN6_RP_DOWN_IDLE_AVG);
4800
4801         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
4802
4803         /* RPS code assumes GPLL is used */
4804         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
4805
4806         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no");
4807         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
4808
4809         dev_priv->rps.cur_freq = (val >> 8) & 0xff;
4810         DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
4811                          intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
4812                          dev_priv->rps.cur_freq);
4813
4814         DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
4815                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
4816                          dev_priv->rps.efficient_freq);
4817
4818         valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
4819
4820         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4821 }
4822
4823 static void valleyview_enable_rps(struct drm_device *dev)
4824 {
4825         struct drm_i915_private *dev_priv = dev->dev_private;
4826         struct intel_engine_cs *ring;
4827         u32 gtfifodbg, val, rc6_mode = 0;
4828         int i;
4829
4830         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
4831
4832         valleyview_check_pctx(dev_priv);
4833
4834         if ((gtfifodbg = I915_READ(GTFIFODBG))) {
4835                 DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
4836                                  gtfifodbg);
4837                 I915_WRITE(GTFIFODBG, gtfifodbg);
4838         }
4839
4840         /* If VLV, Forcewake all wells, else re-direct to regular path */
4841         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4842
4843         /*  Disable RC states. */
4844         I915_WRITE(GEN6_RC_CONTROL, 0);
4845
4846         I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
4847         I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
4848         I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
4849         I915_WRITE(GEN6_RP_UP_EI, 66000);
4850         I915_WRITE(GEN6_RP_DOWN_EI, 350000);
4851
4852         I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
4853
4854         I915_WRITE(GEN6_RP_CONTROL,
4855                    GEN6_RP_MEDIA_TURBO |
4856                    GEN6_RP_MEDIA_HW_NORMAL_MODE |
4857                    GEN6_RP_MEDIA_IS_GFX |
4858                    GEN6_RP_ENABLE |
4859                    GEN6_RP_UP_BUSY_AVG |
4860                    GEN6_RP_DOWN_IDLE_CONT);
4861
4862         I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
4863         I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
4864         I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
4865
4866         for_each_ring(ring, dev_priv, i)
4867                 I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
4868
4869         I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
4870
4871         /* allows RC6 residency counter to work */
4872         I915_WRITE(VLV_COUNTER_CONTROL,
4873                    _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
4874                                       VLV_RENDER_RC0_COUNT_EN |
4875                                       VLV_MEDIA_RC6_COUNT_EN |
4876                                       VLV_RENDER_RC6_COUNT_EN));
4877
4878         if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
4879                 rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
4880
4881         intel_print_rc6_info(dev, rc6_mode);
4882
4883         I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
4884
4885         val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
4886
4887         /* RPS code assumes GPLL is used */
4888         WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
4889
4890         DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no");
4891         DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
4892
4893         dev_priv->rps.cur_freq = (val >> 8) & 0xff;
4894         DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
4895                          intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
4896                          dev_priv->rps.cur_freq);
4897
4898         DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
4899                          intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
4900                          dev_priv->rps.efficient_freq);
4901
4902         valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
4903
4904         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
4905 }
4906
4907 void ironlake_teardown_rc6(struct drm_device *dev)
4908 {
4909         struct drm_i915_private *dev_priv = dev->dev_private;
4910
4911         if (dev_priv->ips.renderctx) {
4912                 i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx);
4913                 drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
4914                 dev_priv->ips.renderctx = NULL;
4915         }
4916
4917         if (dev_priv->ips.pwrctx) {
4918                 i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx);
4919                 drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
4920                 dev_priv->ips.pwrctx = NULL;
4921         }
4922 }
4923
4924 static void ironlake_disable_rc6(struct drm_device *dev)
4925 {
4926         struct drm_i915_private *dev_priv = dev->dev_private;
4927
4928         if (I915_READ(PWRCTXA)) {
4929                 /* Wake the GPU, prevent RC6, then restore RSTDBYCTL */
4930                 I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT);
4931                 wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON),
4932                          50);
4933
4934                 I915_WRITE(PWRCTXA, 0);
4935                 POSTING_READ(PWRCTXA);
4936
4937                 I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
4938                 POSTING_READ(RSTDBYCTL);
4939         }
4940 }
4941
4942 static int ironlake_setup_rc6(struct drm_device *dev)
4943 {
4944         struct drm_i915_private *dev_priv = dev->dev_private;
4945
4946         if (dev_priv->ips.renderctx == NULL)
4947                 dev_priv->ips.renderctx = intel_alloc_context_page(dev);
4948         if (!dev_priv->ips.renderctx)
4949                 return -ENOMEM;
4950
4951         if (dev_priv->ips.pwrctx == NULL)
4952                 dev_priv->ips.pwrctx = intel_alloc_context_page(dev);
4953         if (!dev_priv->ips.pwrctx) {
4954                 ironlake_teardown_rc6(dev);
4955                 return -ENOMEM;
4956         }
4957
4958         return 0;
4959 }
4960
4961 static void ironlake_enable_rc6(struct drm_device *dev)
4962 {
4963         struct drm_i915_private *dev_priv = dev->dev_private;
4964         struct intel_engine_cs *ring = &dev_priv->ring[RCS];
4965         bool was_interruptible;
4966         int ret;
4967
4968         /* rc6 disabled by default due to repeated reports of hanging during
4969          * boot and resume.
4970          */
4971         if (!intel_enable_rc6(dev))
4972                 return;
4973
4974         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
4975
4976         ret = ironlake_setup_rc6(dev);
4977         if (ret)
4978                 return;
4979
4980         was_interruptible = dev_priv->mm.interruptible;
4981         dev_priv->mm.interruptible = false;
4982
4983         /*
4984          * GPU can automatically power down the render unit if given a page
4985          * to save state.
4986          */
4987         ret = intel_ring_begin(ring, 6);
4988         if (ret) {
4989                 ironlake_teardown_rc6(dev);
4990                 dev_priv->mm.interruptible = was_interruptible;
4991                 return;
4992         }
4993
4994         intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
4995         intel_ring_emit(ring, MI_SET_CONTEXT);
4996         intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) |
4997                         MI_MM_SPACE_GTT |
4998                         MI_SAVE_EXT_STATE_EN |
4999                         MI_RESTORE_EXT_STATE_EN |
5000                         MI_RESTORE_INHIBIT);
5001         intel_ring_emit(ring, MI_SUSPEND_FLUSH);
5002         intel_ring_emit(ring, MI_NOOP);
5003         intel_ring_emit(ring, MI_FLUSH);
5004         intel_ring_advance(ring);
5005
5006         /*
5007          * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
5008          * does an implicit flush, combined with MI_FLUSH above, it should be
5009          * safe to assume that renderctx is valid
5010          */
5011         ret = intel_ring_idle(ring);
5012         dev_priv->mm.interruptible = was_interruptible;
5013         if (ret) {
5014                 DRM_ERROR("failed to enable ironlake power savings\n");
5015                 ironlake_teardown_rc6(dev);
5016                 return;
5017         }
5018
5019         I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN);
5020         I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
5021
5022         intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE);
5023 }
5024
5025 static unsigned long intel_pxfreq(u32 vidfreq)
5026 {
5027         unsigned long freq;
5028         int div = (vidfreq & 0x3f0000) >> 16;
5029         int post = (vidfreq & 0x3000) >> 12;
5030         int pre = (vidfreq & 0x7);
5031
5032         if (!pre)
5033                 return 0;
5034
5035         freq = ((div * 133333) / ((1<<post) * pre));
5036
5037         return freq;
5038 }
5039
5040 static const struct cparams {
5041         u16 i;
5042         u16 t;
5043         u16 m;
5044         u16 c;
5045 } cparams[] = {
5046         { 1, 1333, 301, 28664 },
5047         { 1, 1066, 294, 24460 },
5048         { 1, 800, 294, 25192 },
5049         { 0, 1333, 276, 27605 },
5050         { 0, 1066, 276, 27605 },
5051         { 0, 800, 231, 23784 },
5052 };
5053
5054 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
5055 {
5056         u64 total_count, diff, ret;
5057         u32 count1, count2, count3, m = 0, c = 0;
5058         unsigned long now = jiffies_to_msecs(jiffies), diff1;
5059         int i;
5060
5061         assert_spin_locked(&mchdev_lock);
5062
5063         diff1 = now - dev_priv->ips.last_time1;
5064
5065         /* Prevent division-by-zero if we are asking too fast.
5066          * Also, we don't get interesting results if we are polling
5067          * faster than once in 10ms, so just return the saved value
5068          * in such cases.
5069          */
5070         if (diff1 <= 10)
5071                 return dev_priv->ips.chipset_power;
5072
5073         count1 = I915_READ(DMIEC);
5074         count2 = I915_READ(DDREC);
5075         count3 = I915_READ(CSIEC);
5076
5077         total_count = count1 + count2 + count3;
5078
5079         /* FIXME: handle per-counter overflow */
5080         if (total_count < dev_priv->ips.last_count1) {
5081                 diff = ~0UL - dev_priv->ips.last_count1;
5082                 diff += total_count;
5083         } else {
5084                 diff = total_count - dev_priv->ips.last_count1;
5085         }
5086
5087         for (i = 0; i < ARRAY_SIZE(cparams); i++) {
5088                 if (cparams[i].i == dev_priv->ips.c_m &&
5089                     cparams[i].t == dev_priv->ips.r_t) {
5090                         m = cparams[i].m;
5091                         c = cparams[i].c;
5092                         break;
5093                 }
5094         }
5095
5096         diff = div_u64(diff, diff1);
5097         ret = ((m * diff) + c);
5098         ret = div_u64(ret, 10);
5099
5100         dev_priv->ips.last_count1 = total_count;
5101         dev_priv->ips.last_time1 = now;
5102
5103         dev_priv->ips.chipset_power = ret;
5104
5105         return ret;
5106 }
5107
5108 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
5109 {
5110         struct drm_device *dev = dev_priv->dev;
5111         unsigned long val;
5112
5113         if (INTEL_INFO(dev)->gen != 5)
5114                 return 0;
5115
5116         spin_lock_irq(&mchdev_lock);
5117
5118         val = __i915_chipset_val(dev_priv);
5119
5120         spin_unlock_irq(&mchdev_lock);
5121
5122         return val;
5123 }
5124
5125 unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
5126 {
5127         unsigned long m, x, b;
5128         u32 tsfs;
5129
5130         tsfs = I915_READ(TSFS);
5131
5132         m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
5133         x = I915_READ8(TR1);
5134
5135         b = tsfs & TSFS_INTR_MASK;
5136
5137         return ((m * x) / 127) - b;
5138 }
5139
5140 static int _pxvid_to_vd(u8 pxvid)
5141 {
5142         if (pxvid == 0)
5143                 return 0;
5144
5145         if (pxvid >= 8 && pxvid < 31)
5146                 pxvid = 31;
5147
5148         return (pxvid + 2) * 125;
5149 }
5150
5151 static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
5152 {
5153         struct drm_device *dev = dev_priv->dev;
5154         const int vd = _pxvid_to_vd(pxvid);
5155         const int vm = vd - 1125;
5156
5157         if (INTEL_INFO(dev)->is_mobile)
5158                 return vm > 0 ? vm : 0;
5159
5160         return vd;
5161 }
5162
5163 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
5164 {
5165         u64 now, diff, diffms;
5166         u32 count;
5167
5168         assert_spin_locked(&mchdev_lock);
5169
5170         now = ktime_get_raw_ns();
5171         diffms = now - dev_priv->ips.last_time2;
5172         do_div(diffms, NSEC_PER_MSEC);
5173
5174         /* Don't divide by 0 */
5175         if (!diffms)
5176                 return;
5177
5178         count = I915_READ(GFXEC);
5179
5180         if (count < dev_priv->ips.last_count2) {
5181                 diff = ~0UL - dev_priv->ips.last_count2;
5182                 diff += count;
5183         } else {
5184                 diff = count - dev_priv->ips.last_count2;
5185         }
5186
5187         dev_priv->ips.last_count2 = count;
5188         dev_priv->ips.last_time2 = now;
5189
5190         /* More magic constants... */
5191         diff = diff * 1181;
5192         diff = div_u64(diff, diffms * 10);
5193         dev_priv->ips.gfx_power = diff;
5194 }
5195
5196 void i915_update_gfx_val(struct drm_i915_private *dev_priv)
5197 {
5198         struct drm_device *dev = dev_priv->dev;
5199
5200         if (INTEL_INFO(dev)->gen != 5)
5201                 return;
5202
5203         spin_lock_irq(&mchdev_lock);
5204
5205         __i915_update_gfx_val(dev_priv);
5206
5207         spin_unlock_irq(&mchdev_lock);
5208 }
5209
5210 static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
5211 {
5212         unsigned long t, corr, state1, corr2, state2;
5213         u32 pxvid, ext_v;
5214
5215         assert_spin_locked(&mchdev_lock);
5216
5217         pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_freq * 4));
5218         pxvid = (pxvid >> 24) & 0x7f;
5219         ext_v = pvid_to_extvid(dev_priv, pxvid);
5220
5221         state1 = ext_v;
5222
5223         t = i915_mch_val(dev_priv);
5224
5225         /* Revel in the empirically derived constants */
5226
5227         /* Correction factor in 1/100000 units */
5228         if (t > 80)
5229                 corr = ((t * 2349) + 135940);
5230         else if (t >= 50)
5231                 corr = ((t * 964) + 29317);
5232         else /* < 50 */
5233                 corr = ((t * 301) + 1004);
5234
5235         corr = corr * ((150142 * state1) / 10000 - 78642);
5236         corr /= 100000;
5237         corr2 = (corr * dev_priv->ips.corr);
5238
5239         state2 = (corr2 * state1) / 10000;
5240         state2 /= 100; /* convert to mW */
5241
5242         __i915_update_gfx_val(dev_priv);
5243
5244         return dev_priv->ips.gfx_power + state2;
5245 }
5246
5247 unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
5248 {
5249         struct drm_device *dev = dev_priv->dev;
5250         unsigned long val;
5251
5252         if (INTEL_INFO(dev)->gen != 5)
5253                 return 0;
5254
5255         spin_lock_irq(&mchdev_lock);
5256
5257         val = __i915_gfx_val(dev_priv);
5258
5259         spin_unlock_irq(&mchdev_lock);
5260
5261         return val;
5262 }
5263
5264 /**
5265  * i915_read_mch_val - return value for IPS use
5266  *
5267  * Calculate and return a value for the IPS driver to use when deciding whether
5268  * we have thermal and power headroom to increase CPU or GPU power budget.
5269  */
5270 unsigned long i915_read_mch_val(void)
5271 {
5272         struct drm_i915_private *dev_priv;
5273         unsigned long chipset_val, graphics_val, ret = 0;
5274
5275         spin_lock_irq(&mchdev_lock);
5276         if (!i915_mch_dev)
5277                 goto out_unlock;
5278         dev_priv = i915_mch_dev;
5279
5280         chipset_val = __i915_chipset_val(dev_priv);
5281         graphics_val = __i915_gfx_val(dev_priv);
5282
5283         ret = chipset_val + graphics_val;
5284
5285 out_unlock:
5286         spin_unlock_irq(&mchdev_lock);
5287
5288         return ret;
5289 }
5290 EXPORT_SYMBOL_GPL(i915_read_mch_val);
5291
5292 /**
5293  * i915_gpu_raise - raise GPU frequency limit
5294  *
5295  * Raise the limit; IPS indicates we have thermal headroom.
5296  */
5297 bool i915_gpu_raise(void)
5298 {
5299         struct drm_i915_private *dev_priv;
5300         bool ret = true;
5301
5302         spin_lock_irq(&mchdev_lock);
5303         if (!i915_mch_dev) {
5304                 ret = false;
5305                 goto out_unlock;
5306         }
5307         dev_priv = i915_mch_dev;
5308
5309         if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
5310                 dev_priv->ips.max_delay--;
5311
5312 out_unlock:
5313         spin_unlock_irq(&mchdev_lock);
5314
5315         return ret;
5316 }
5317 EXPORT_SYMBOL_GPL(i915_gpu_raise);
5318
5319 /**
5320  * i915_gpu_lower - lower GPU frequency limit
5321  *
5322  * IPS indicates we're close to a thermal limit, so throttle back the GPU
5323  * frequency maximum.
5324  */
5325 bool i915_gpu_lower(void)
5326 {
5327         struct drm_i915_private *dev_priv;
5328         bool ret = true;
5329
5330         spin_lock_irq(&mchdev_lock);
5331         if (!i915_mch_dev) {
5332                 ret = false;
5333                 goto out_unlock;
5334         }
5335         dev_priv = i915_mch_dev;
5336
5337         if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
5338                 dev_priv->ips.max_delay++;
5339
5340 out_unlock:
5341         spin_unlock_irq(&mchdev_lock);
5342
5343         return ret;
5344 }
5345 EXPORT_SYMBOL_GPL(i915_gpu_lower);
5346
5347 /**
5348  * i915_gpu_busy - indicate GPU business to IPS
5349  *
5350  * Tell the IPS driver whether or not the GPU is busy.
5351  */
5352 bool i915_gpu_busy(void)
5353 {
5354         struct drm_i915_private *dev_priv;
5355         struct intel_engine_cs *ring;
5356         bool ret = false;
5357         int i;
5358
5359         spin_lock_irq(&mchdev_lock);
5360         if (!i915_mch_dev)
5361                 goto out_unlock;
5362         dev_priv = i915_mch_dev;
5363
5364         for_each_ring(ring, dev_priv, i)
5365                 ret |= !list_empty(&ring->request_list);
5366
5367 out_unlock:
5368         spin_unlock_irq(&mchdev_lock);
5369
5370         return ret;
5371 }
5372 EXPORT_SYMBOL_GPL(i915_gpu_busy);
5373
5374 /**
5375  * i915_gpu_turbo_disable - disable graphics turbo
5376  *
5377  * Disable graphics turbo by resetting the max frequency and setting the
5378  * current frequency to the default.
5379  */
5380 bool i915_gpu_turbo_disable(void)
5381 {
5382         struct drm_i915_private *dev_priv;
5383         bool ret = true;
5384
5385         spin_lock_irq(&mchdev_lock);
5386         if (!i915_mch_dev) {
5387                 ret = false;
5388                 goto out_unlock;
5389         }
5390         dev_priv = i915_mch_dev;
5391
5392         dev_priv->ips.max_delay = dev_priv->ips.fstart;
5393
5394         if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
5395                 ret = false;
5396
5397 out_unlock:
5398         spin_unlock_irq(&mchdev_lock);
5399
5400         return ret;
5401 }
5402 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
5403
5404 /**
5405  * Tells the intel_ips driver that the i915 driver is now loaded, if
5406  * IPS got loaded first.
5407  *
5408  * This awkward dance is so that neither module has to depend on the
5409  * other in order for IPS to do the appropriate communication of
5410  * GPU turbo limits to i915.
5411  */
5412 static void
5413 ips_ping_for_i915_load(void)
5414 {
5415         void (*link)(void);
5416
5417         link = symbol_get(ips_link_to_i915_driver);
5418         if (link) {
5419                 link();
5420                 symbol_put(ips_link_to_i915_driver);
5421         }
5422 }
5423
5424 void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
5425 {
5426         /* We only register the i915 ips part with intel-ips once everything is
5427          * set up, to avoid intel-ips sneaking in and reading bogus values. */
5428         spin_lock_irq(&mchdev_lock);
5429         i915_mch_dev = dev_priv;
5430         spin_unlock_irq(&mchdev_lock);
5431
5432         ips_ping_for_i915_load();
5433 }
5434
5435 void intel_gpu_ips_teardown(void)
5436 {
5437         spin_lock_irq(&mchdev_lock);
5438         i915_mch_dev = NULL;
5439         spin_unlock_irq(&mchdev_lock);
5440 }
5441
5442 static void intel_init_emon(struct drm_device *dev)
5443 {
5444         struct drm_i915_private *dev_priv = dev->dev_private;
5445         u32 lcfuse;
5446         u8 pxw[16];
5447         int i;
5448
5449         /* Disable to program */
5450         I915_WRITE(ECR, 0);
5451         POSTING_READ(ECR);
5452
5453         /* Program energy weights for various events */
5454         I915_WRITE(SDEW, 0x15040d00);
5455         I915_WRITE(CSIEW0, 0x007f0000);
5456         I915_WRITE(CSIEW1, 0x1e220004);
5457         I915_WRITE(CSIEW2, 0x04000004);
5458
5459         for (i = 0; i < 5; i++)
5460                 I915_WRITE(PEW + (i * 4), 0);
5461         for (i = 0; i < 3; i++)
5462                 I915_WRITE(DEW + (i * 4), 0);
5463
5464         /* Program P-state weights to account for frequency power adjustment */
5465         for (i = 0; i < 16; i++) {
5466                 u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4));
5467                 unsigned long freq = intel_pxfreq(pxvidfreq);
5468                 unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
5469                         PXVFREQ_PX_SHIFT;
5470                 unsigned long val;
5471
5472                 val = vid * vid;
5473                 val *= (freq / 1000);
5474                 val *= 255;
5475                 val /= (127*127*900);
5476                 if (val > 0xff)
5477                         DRM_ERROR("bad pxval: %ld\n", val);
5478                 pxw[i] = val;
5479         }
5480         /* Render standby states get 0 weight */
5481         pxw[14] = 0;
5482         pxw[15] = 0;
5483
5484         for (i = 0; i < 4; i++) {
5485                 u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
5486                         (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
5487                 I915_WRITE(PXW + (i * 4), val);
5488         }
5489
5490         /* Adjust magic regs to magic values (more experimental results) */
5491         I915_WRITE(OGW0, 0);
5492         I915_WRITE(OGW1, 0);
5493         I915_WRITE(EG0, 0x00007f00);
5494         I915_WRITE(EG1, 0x0000000e);
5495         I915_WRITE(EG2, 0x000e0000);
5496         I915_WRITE(EG3, 0x68000300);
5497         I915_WRITE(EG4, 0x42000000);
5498         I915_WRITE(EG5, 0x00140031);
5499         I915_WRITE(EG6, 0);
5500         I915_WRITE(EG7, 0);
5501
5502         for (i = 0; i < 8; i++)
5503                 I915_WRITE(PXWL + (i * 4), 0);
5504
5505         /* Enable PMON + select events */
5506         I915_WRITE(ECR, 0x80000019);
5507
5508         lcfuse = I915_READ(LCFUSE02);
5509
5510         dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
5511 }
5512
5513 void intel_init_gt_powersave(struct drm_device *dev)
5514 {
5515         i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
5516
5517         if (IS_CHERRYVIEW(dev))
5518                 cherryview_init_gt_powersave(dev);
5519         else if (IS_VALLEYVIEW(dev))
5520                 valleyview_init_gt_powersave(dev);
5521 }
5522
5523 void intel_cleanup_gt_powersave(struct drm_device *dev)
5524 {
5525         if (IS_CHERRYVIEW(dev))
5526                 return;
5527         else if (IS_VALLEYVIEW(dev))
5528                 valleyview_cleanup_gt_powersave(dev);
5529 }
5530
5531 static void gen6_suspend_rps(struct drm_device *dev)
5532 {
5533         struct drm_i915_private *dev_priv = dev->dev_private;
5534
5535         flush_delayed_work(&dev_priv->rps.delayed_resume_work);
5536
5537         /*
5538          * TODO: disable RPS interrupts on GEN9+ too once RPS support
5539          * is added for it.
5540          */
5541         if (INTEL_INFO(dev)->gen < 9)
5542                 gen6_disable_rps_interrupts(dev);
5543 }
5544
5545 /**
5546  * intel_suspend_gt_powersave - suspend PM work and helper threads
5547  * @dev: drm device
5548  *
5549  * We don't want to disable RC6 or other features here, we just want
5550  * to make sure any work we've queued has finished and won't bother
5551  * us while we're suspended.
5552  */
5553 void intel_suspend_gt_powersave(struct drm_device *dev)
5554 {
5555         struct drm_i915_private *dev_priv = dev->dev_private;
5556
5557         if (INTEL_INFO(dev)->gen < 6)
5558                 return;
5559
5560         gen6_suspend_rps(dev);
5561
5562         /* Force GPU to min freq during suspend */
5563         gen6_rps_idle(dev_priv);
5564 }
5565
5566 void intel_disable_gt_powersave(struct drm_device *dev)
5567 {
5568         struct drm_i915_private *dev_priv = dev->dev_private;
5569
5570         if (IS_IRONLAKE_M(dev)) {
5571                 ironlake_disable_drps(dev);
5572                 ironlake_disable_rc6(dev);
5573         } else if (INTEL_INFO(dev)->gen >= 6) {
5574                 intel_suspend_gt_powersave(dev);
5575
5576                 mutex_lock(&dev_priv->rps.hw_lock);
5577                 if (INTEL_INFO(dev)->gen >= 9)
5578                         gen9_disable_rps(dev);
5579                 else if (IS_CHERRYVIEW(dev))
5580                         cherryview_disable_rps(dev);
5581                 else if (IS_VALLEYVIEW(dev))
5582                         valleyview_disable_rps(dev);
5583                 else
5584                         gen6_disable_rps(dev);
5585
5586                 dev_priv->rps.enabled = false;
5587                 mutex_unlock(&dev_priv->rps.hw_lock);
5588         }
5589 }
5590
5591 static void intel_gen6_powersave_work(struct work_struct *work)
5592 {
5593         struct drm_i915_private *dev_priv =
5594                 container_of(work, struct drm_i915_private,
5595                              rps.delayed_resume_work.work);
5596         struct drm_device *dev = dev_priv->dev;
5597
5598         mutex_lock(&dev_priv->rps.hw_lock);
5599
5600         /*
5601          * TODO: reset/enable RPS interrupts on GEN9+ too, once RPS support is
5602          * added for it.
5603          */
5604         if (INTEL_INFO(dev)->gen < 9)
5605                 gen6_reset_rps_interrupts(dev);
5606
5607         if (IS_CHERRYVIEW(dev)) {
5608                 cherryview_enable_rps(dev);
5609         } else if (IS_VALLEYVIEW(dev)) {
5610                 valleyview_enable_rps(dev);
5611         } else if (INTEL_INFO(dev)->gen >= 9) {
5612                 gen9_enable_rc6(dev);
5613                 gen9_enable_rps(dev);
5614                 __gen6_update_ring_freq(dev);
5615         } else if (IS_BROADWELL(dev)) {
5616                 gen8_enable_rps(dev);
5617                 __gen6_update_ring_freq(dev);
5618         } else {
5619                 gen6_enable_rps(dev);
5620                 __gen6_update_ring_freq(dev);
5621         }
5622         dev_priv->rps.enabled = true;
5623
5624         if (INTEL_INFO(dev)->gen < 9)
5625                 gen6_enable_rps_interrupts(dev);
5626
5627         mutex_unlock(&dev_priv->rps.hw_lock);
5628
5629         intel_runtime_pm_put(dev_priv);
5630 }
5631
5632 void intel_enable_gt_powersave(struct drm_device *dev)
5633 {
5634         struct drm_i915_private *dev_priv = dev->dev_private;
5635
5636         if (IS_IRONLAKE_M(dev)) {
5637                 mutex_lock(&dev->struct_mutex);
5638                 ironlake_enable_drps(dev);
5639                 ironlake_enable_rc6(dev);
5640                 intel_init_emon(dev);
5641                 mutex_unlock(&dev->struct_mutex);
5642         } else if (INTEL_INFO(dev)->gen >= 6) {
5643                 /*
5644                  * PCU communication is slow and this doesn't need to be
5645                  * done at any specific time, so do this out of our fast path
5646                  * to make resume and init faster.
5647                  *
5648                  * We depend on the HW RC6 power context save/restore
5649                  * mechanism when entering D3 through runtime PM suspend. So
5650                  * disable RPM until RPS/RC6 is properly setup. We can only
5651                  * get here via the driver load/system resume/runtime resume
5652                  * paths, so the _noresume version is enough (and in case of
5653                  * runtime resume it's necessary).
5654                  */
5655                 if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
5656                                            round_jiffies_up_relative(HZ)))
5657                         intel_runtime_pm_get_noresume(dev_priv);
5658         }
5659 }
5660
5661 void intel_reset_gt_powersave(struct drm_device *dev)
5662 {
5663         struct drm_i915_private *dev_priv = dev->dev_private;
5664
5665         if (INTEL_INFO(dev)->gen < 6)
5666                 return;
5667
5668         gen6_suspend_rps(dev);
5669         dev_priv->rps.enabled = false;
5670 }
5671
5672 static void ibx_init_clock_gating(struct drm_device *dev)
5673 {
5674         struct drm_i915_private *dev_priv = dev->dev_private;
5675
5676         /*
5677          * On Ibex Peak and Cougar Point, we need to disable clock
5678          * gating for the panel power sequencer or it will fail to
5679          * start up when no ports are active.
5680          */
5681         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
5682 }
5683
5684 static void g4x_disable_trickle_feed(struct drm_device *dev)
5685 {
5686         struct drm_i915_private *dev_priv = dev->dev_private;
5687         int pipe;
5688
5689         for_each_pipe(dev_priv, pipe) {
5690                 I915_WRITE(DSPCNTR(pipe),
5691                            I915_READ(DSPCNTR(pipe)) |
5692                            DISPPLANE_TRICKLE_FEED_DISABLE);
5693                 intel_flush_primary_plane(dev_priv, pipe);
5694         }
5695 }
5696
5697 static void ilk_init_lp_watermarks(struct drm_device *dev)
5698 {
5699         struct drm_i915_private *dev_priv = dev->dev_private;
5700
5701         I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
5702         I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
5703         I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
5704
5705         /*
5706          * Don't touch WM1S_LP_EN here.
5707          * Doing so could cause underruns.
5708          */
5709 }
5710
5711 static void ironlake_init_clock_gating(struct drm_device *dev)
5712 {
5713         struct drm_i915_private *dev_priv = dev->dev_private;
5714         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
5715
5716         /*
5717          * Required for FBC
5718          * WaFbcDisableDpfcClockGating:ilk
5719          */
5720         dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
5721                    ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
5722                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
5723
5724         I915_WRITE(PCH_3DCGDIS0,
5725                    MARIUNIT_CLOCK_GATE_DISABLE |
5726                    SVSMUNIT_CLOCK_GATE_DISABLE);
5727         I915_WRITE(PCH_3DCGDIS1,
5728                    VFMUNIT_CLOCK_GATE_DISABLE);
5729
5730         /*
5731          * According to the spec the following bits should be set in
5732          * order to enable memory self-refresh
5733          * The bit 22/21 of 0x42004
5734          * The bit 5 of 0x42020
5735          * The bit 15 of 0x45000
5736          */
5737         I915_WRITE(ILK_DISPLAY_CHICKEN2,
5738                    (I915_READ(ILK_DISPLAY_CHICKEN2) |
5739                     ILK_DPARB_GATE | ILK_VSDPFD_FULL));
5740         dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
5741         I915_WRITE(DISP_ARB_CTL,
5742                    (I915_READ(DISP_ARB_CTL) |
5743                     DISP_FBC_WM_DIS));
5744
5745         ilk_init_lp_watermarks(dev);
5746
5747         /*
5748          * Based on the document from hardware guys the following bits
5749          * should be set unconditionally in order to enable FBC.
5750          * The bit 22 of 0x42000
5751          * The bit 22 of 0x42004
5752          * The bit 7,8,9 of 0x42020.
5753          */
5754         if (IS_IRONLAKE_M(dev)) {
5755                 /* WaFbcAsynchFlipDisableFbcQueue:ilk */
5756                 I915_WRITE(ILK_DISPLAY_CHICKEN1,
5757                            I915_READ(ILK_DISPLAY_CHICKEN1) |
5758                            ILK_FBCQ_DIS);
5759                 I915_WRITE(ILK_DISPLAY_CHICKEN2,
5760                            I915_READ(ILK_DISPLAY_CHICKEN2) |
5761                            ILK_DPARB_GATE);
5762         }
5763
5764         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
5765
5766         I915_WRITE(ILK_DISPLAY_CHICKEN2,
5767                    I915_READ(ILK_DISPLAY_CHICKEN2) |
5768                    ILK_ELPIN_409_SELECT);
5769         I915_WRITE(_3D_CHICKEN2,
5770                    _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
5771                    _3D_CHICKEN2_WM_READ_PIPELINED);
5772
5773         /* WaDisableRenderCachePipelinedFlush:ilk */
5774         I915_WRITE(CACHE_MODE_0,
5775                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
5776
5777         /* WaDisable_RenderCache_OperationalFlush:ilk */
5778         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
5779
5780         g4x_disable_trickle_feed(dev);
5781
5782         ibx_init_clock_gating(dev);
5783 }
5784
5785 static void cpt_init_clock_gating(struct drm_device *dev)
5786 {
5787         struct drm_i915_private *dev_priv = dev->dev_private;
5788         int pipe;
5789         uint32_t val;
5790
5791         /*
5792          * On Ibex Peak and Cougar Point, we need to disable clock
5793          * gating for the panel power sequencer or it will fail to
5794          * start up when no ports are active.
5795          */
5796         I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
5797                    PCH_DPLUNIT_CLOCK_GATE_DISABLE |
5798                    PCH_CPUNIT_CLOCK_GATE_DISABLE);
5799         I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
5800                    DPLS_EDP_PPS_FIX_DIS);
5801         /* The below fixes the weird display corruption, a few pixels shifted
5802          * downward, on (only) LVDS of some HP laptops with IVY.
5803          */
5804         for_each_pipe(dev_priv, pipe) {
5805                 val = I915_READ(TRANS_CHICKEN2(pipe));
5806                 val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
5807                 val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
5808                 if (dev_priv->vbt.fdi_rx_polarity_inverted)
5809                         val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
5810                 val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
5811                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
5812                 val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
5813                 I915_WRITE(TRANS_CHICKEN2(pipe), val);
5814         }
5815         /* WADP0ClockGatingDisable */
5816         for_each_pipe(dev_priv, pipe) {
5817                 I915_WRITE(TRANS_CHICKEN1(pipe),
5818                            TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
5819         }
5820 }
5821
5822 static void gen6_check_mch_setup(struct drm_device *dev)
5823 {
5824         struct drm_i915_private *dev_priv = dev->dev_private;
5825         uint32_t tmp;
5826
5827         tmp = I915_READ(MCH_SSKPD);
5828         if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
5829                 DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
5830                               tmp);
5831 }
5832
5833 static void gen6_init_clock_gating(struct drm_device *dev)
5834 {
5835         struct drm_i915_private *dev_priv = dev->dev_private;
5836         uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
5837
5838         I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
5839
5840         I915_WRITE(ILK_DISPLAY_CHICKEN2,
5841                    I915_READ(ILK_DISPLAY_CHICKEN2) |
5842                    ILK_ELPIN_409_SELECT);
5843
5844         /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
5845         I915_WRITE(_3D_CHICKEN,
5846                    _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
5847
5848         /* WaDisable_RenderCache_OperationalFlush:snb */
5849         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
5850
5851         /*
5852          * BSpec recoomends 8x4 when MSAA is used,
5853          * however in practice 16x4 seems fastest.
5854          *
5855          * Note that PS/WM thread counts depend on the WIZ hashing
5856          * disable bit, which we don't touch here, but it's good
5857          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
5858          */
5859         I915_WRITE(GEN6_GT_MODE,
5860                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
5861
5862         ilk_init_lp_watermarks(dev);
5863
5864         I915_WRITE(CACHE_MODE_0,
5865                    _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
5866
5867         I915_WRITE(GEN6_UCGCTL1,
5868                    I915_READ(GEN6_UCGCTL1) |
5869                    GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
5870                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
5871
5872         /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
5873          * gating disable must be set.  Failure to set it results in
5874          * flickering pixels due to Z write ordering failures after
5875          * some amount of runtime in the Mesa "fire" demo, and Unigine
5876          * Sanctuary and Tropics, and apparently anything else with
5877          * alpha test or pixel discard.
5878          *
5879          * According to the spec, bit 11 (RCCUNIT) must also be set,
5880          * but we didn't debug actual testcases to find it out.
5881          *
5882          * WaDisableRCCUnitClockGating:snb
5883          * WaDisableRCPBUnitClockGating:snb
5884          */
5885         I915_WRITE(GEN6_UCGCTL2,
5886                    GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
5887                    GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
5888
5889         /* WaStripsFansDisableFastClipPerformanceFix:snb */
5890         I915_WRITE(_3D_CHICKEN3,
5891                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
5892
5893         /*
5894          * Bspec says:
5895          * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
5896          * 3DSTATE_SF number of SF output attributes is more than 16."
5897          */
5898         I915_WRITE(_3D_CHICKEN3,
5899                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
5900
5901         /*
5902          * According to the spec the following bits should be
5903          * set in order to enable memory self-refresh and fbc:
5904          * The bit21 and bit22 of 0x42000
5905          * The bit21 and bit22 of 0x42004
5906          * The bit5 and bit7 of 0x42020
5907          * The bit14 of 0x70180
5908          * The bit14 of 0x71180
5909          *
5910          * WaFbcAsynchFlipDisableFbcQueue:snb
5911          */
5912         I915_WRITE(ILK_DISPLAY_CHICKEN1,
5913                    I915_READ(ILK_DISPLAY_CHICKEN1) |
5914                    ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
5915         I915_WRITE(ILK_DISPLAY_CHICKEN2,
5916                    I915_READ(ILK_DISPLAY_CHICKEN2) |
5917                    ILK_DPARB_GATE | ILK_VSDPFD_FULL);
5918         I915_WRITE(ILK_DSPCLK_GATE_D,
5919                    I915_READ(ILK_DSPCLK_GATE_D) |
5920                    ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
5921                    ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
5922
5923         g4x_disable_trickle_feed(dev);
5924
5925         cpt_init_clock_gating(dev);
5926
5927         gen6_check_mch_setup(dev);
5928 }
5929
5930 static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
5931 {
5932         uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
5933
5934         /*
5935          * WaVSThreadDispatchOverride:ivb,vlv
5936          *
5937          * This actually overrides the dispatch
5938          * mode for all thread types.
5939          */
5940         reg &= ~GEN7_FF_SCHED_MASK;
5941         reg |= GEN7_FF_TS_SCHED_HW;
5942         reg |= GEN7_FF_VS_SCHED_HW;
5943         reg |= GEN7_FF_DS_SCHED_HW;
5944
5945         I915_WRITE(GEN7_FF_THREAD_MODE, reg);
5946 }
5947
5948 static void lpt_init_clock_gating(struct drm_device *dev)
5949 {
5950         struct drm_i915_private *dev_priv = dev->dev_private;
5951
5952         /*
5953          * TODO: this bit should only be enabled when really needed, then
5954          * disabled when not needed anymore in order to save power.
5955          */
5956         if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
5957                 I915_WRITE(SOUTH_DSPCLK_GATE_D,
5958                            I915_READ(SOUTH_DSPCLK_GATE_D) |
5959                            PCH_LP_PARTITION_LEVEL_DISABLE);
5960
5961         /* WADPOClockGatingDisable:hsw */
5962         I915_WRITE(_TRANSA_CHICKEN1,
5963                    I915_READ(_TRANSA_CHICKEN1) |
5964                    TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
5965 }
5966
5967 static void lpt_suspend_hw(struct drm_device *dev)
5968 {
5969         struct drm_i915_private *dev_priv = dev->dev_private;
5970
5971         if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
5972                 uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
5973
5974                 val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
5975                 I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
5976         }
5977 }
5978
5979 static void broadwell_init_clock_gating(struct drm_device *dev)
5980 {
5981         struct drm_i915_private *dev_priv = dev->dev_private;
5982         enum pipe pipe;
5983
5984         I915_WRITE(WM3_LP_ILK, 0);
5985         I915_WRITE(WM2_LP_ILK, 0);
5986         I915_WRITE(WM1_LP_ILK, 0);
5987
5988         /* WaSwitchSolVfFArbitrationPriority:bdw */
5989         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
5990
5991         /* WaPsrDPAMaskVBlankInSRD:bdw */
5992         I915_WRITE(CHICKEN_PAR1_1,
5993                    I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
5994
5995         /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
5996         for_each_pipe(dev_priv, pipe) {
5997                 I915_WRITE(CHICKEN_PIPESL_1(pipe),
5998                            I915_READ(CHICKEN_PIPESL_1(pipe)) |
5999                            BDW_DPRS_MASK_VBLANK_SRD);
6000         }
6001
6002         /* WaVSRefCountFullforceMissDisable:bdw */
6003         /* WaDSRefCountFullforceMissDisable:bdw */
6004         I915_WRITE(GEN7_FF_THREAD_MODE,
6005                    I915_READ(GEN7_FF_THREAD_MODE) &
6006                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6007
6008         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6009                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6010
6011         /* WaDisableSDEUnitClockGating:bdw */
6012         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6013                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6014
6015         lpt_init_clock_gating(dev);
6016 }
6017
6018 static void haswell_init_clock_gating(struct drm_device *dev)
6019 {
6020         struct drm_i915_private *dev_priv = dev->dev_private;
6021
6022         ilk_init_lp_watermarks(dev);
6023
6024         /* L3 caching of data atomics doesn't work -- disable it. */
6025         I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
6026         I915_WRITE(HSW_ROW_CHICKEN3,
6027                    _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
6028
6029         /* This is required by WaCatErrorRejectionIssue:hsw */
6030         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6031                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6032                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6033
6034         /* WaVSRefCountFullforceMissDisable:hsw */
6035         I915_WRITE(GEN7_FF_THREAD_MODE,
6036                    I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
6037
6038         /* WaDisable_RenderCache_OperationalFlush:hsw */
6039         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6040
6041         /* enable HiZ Raw Stall Optimization */
6042         I915_WRITE(CACHE_MODE_0_GEN7,
6043                    _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6044
6045         /* WaDisable4x2SubspanOptimization:hsw */
6046         I915_WRITE(CACHE_MODE_1,
6047                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6048
6049         /*
6050          * BSpec recommends 8x4 when MSAA is used,
6051          * however in practice 16x4 seems fastest.
6052          *
6053          * Note that PS/WM thread counts depend on the WIZ hashing
6054          * disable bit, which we don't touch here, but it's good
6055          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6056          */
6057         I915_WRITE(GEN7_GT_MODE,
6058                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6059
6060         /* WaSampleCChickenBitEnable:hsw */
6061         I915_WRITE(HALF_SLICE_CHICKEN3,
6062                    _MASKED_BIT_ENABLE(HSW_SAMPLE_C_PERFORMANCE));
6063
6064         /* WaSwitchSolVfFArbitrationPriority:hsw */
6065         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
6066
6067         /* WaRsPkgCStateDisplayPMReq:hsw */
6068         I915_WRITE(CHICKEN_PAR1_1,
6069                    I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
6070
6071         lpt_init_clock_gating(dev);
6072 }
6073
6074 static void ivybridge_init_clock_gating(struct drm_device *dev)
6075 {
6076         struct drm_i915_private *dev_priv = dev->dev_private;
6077         uint32_t snpcr;
6078
6079         ilk_init_lp_watermarks(dev);
6080
6081         I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
6082
6083         /* WaDisableEarlyCull:ivb */
6084         I915_WRITE(_3D_CHICKEN3,
6085                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6086
6087         /* WaDisableBackToBackFlipFix:ivb */
6088         I915_WRITE(IVB_CHICKEN3,
6089                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6090                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
6091
6092         /* WaDisablePSDDualDispatchEnable:ivb */
6093         if (IS_IVB_GT1(dev))
6094                 I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6095                            _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6096
6097         /* WaDisable_RenderCache_OperationalFlush:ivb */
6098         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6099
6100         /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
6101         I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
6102                    GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
6103
6104         /* WaApplyL3ControlAndL3ChickenMode:ivb */
6105         I915_WRITE(GEN7_L3CNTLREG1,
6106                         GEN7_WA_FOR_GEN7_L3_CONTROL);
6107         I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
6108                    GEN7_WA_L3_CHICKEN_MODE);
6109         if (IS_IVB_GT1(dev))
6110                 I915_WRITE(GEN7_ROW_CHICKEN2,
6111                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6112         else {
6113                 /* must write both registers */
6114                 I915_WRITE(GEN7_ROW_CHICKEN2,
6115                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6116                 I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
6117                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6118         }
6119
6120         /* WaForceL3Serialization:ivb */
6121         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6122                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6123
6124         /*
6125          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6126          * This implements the WaDisableRCZUnitClockGating:ivb workaround.
6127          */
6128         I915_WRITE(GEN6_UCGCTL2,
6129                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6130
6131         /* This is required by WaCatErrorRejectionIssue:ivb */
6132         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6133                         I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6134                         GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6135
6136         g4x_disable_trickle_feed(dev);
6137
6138         gen7_setup_fixed_func_scheduler(dev_priv);
6139
6140         if (0) { /* causes HiZ corruption on ivb:gt1 */
6141                 /* enable HiZ Raw Stall Optimization */
6142                 I915_WRITE(CACHE_MODE_0_GEN7,
6143                            _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
6144         }
6145
6146         /* WaDisable4x2SubspanOptimization:ivb */
6147         I915_WRITE(CACHE_MODE_1,
6148                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6149
6150         /*
6151          * BSpec recommends 8x4 when MSAA is used,
6152          * however in practice 16x4 seems fastest.
6153          *
6154          * Note that PS/WM thread counts depend on the WIZ hashing
6155          * disable bit, which we don't touch here, but it's good
6156          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6157          */
6158         I915_WRITE(GEN7_GT_MODE,
6159                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6160
6161         snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
6162         snpcr &= ~GEN6_MBC_SNPCR_MASK;
6163         snpcr |= GEN6_MBC_SNPCR_MED;
6164         I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
6165
6166         if (!HAS_PCH_NOP(dev))
6167                 cpt_init_clock_gating(dev);
6168
6169         gen6_check_mch_setup(dev);
6170 }
6171
6172 static void valleyview_init_clock_gating(struct drm_device *dev)
6173 {
6174         struct drm_i915_private *dev_priv = dev->dev_private;
6175
6176         I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
6177
6178         /* WaDisableEarlyCull:vlv */
6179         I915_WRITE(_3D_CHICKEN3,
6180                    _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
6181
6182         /* WaDisableBackToBackFlipFix:vlv */
6183         I915_WRITE(IVB_CHICKEN3,
6184                    CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
6185                    CHICKEN3_DGMG_DONE_FIX_DISABLE);
6186
6187         /* WaPsdDispatchEnable:vlv */
6188         /* WaDisablePSDDualDispatchEnable:vlv */
6189         I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
6190                    _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
6191                                       GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
6192
6193         /* WaDisable_RenderCache_OperationalFlush:vlv */
6194         I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6195
6196         /* WaForceL3Serialization:vlv */
6197         I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
6198                    ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
6199
6200         /* WaDisableDopClockGating:vlv */
6201         I915_WRITE(GEN7_ROW_CHICKEN2,
6202                    _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
6203
6204         /* This is required by WaCatErrorRejectionIssue:vlv */
6205         I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
6206                    I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
6207                    GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
6208
6209         gen7_setup_fixed_func_scheduler(dev_priv);
6210
6211         /*
6212          * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
6213          * This implements the WaDisableRCZUnitClockGating:vlv workaround.
6214          */
6215         I915_WRITE(GEN6_UCGCTL2,
6216                    GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
6217
6218         /* WaDisableL3Bank2xClockGate:vlv
6219          * Disabling L3 clock gating- MMIO 940c[25] = 1
6220          * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
6221         I915_WRITE(GEN7_UCGCTL4,
6222                    I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
6223
6224         I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
6225
6226         /*
6227          * BSpec says this must be set, even though
6228          * WaDisable4x2SubspanOptimization isn't listed for VLV.
6229          */
6230         I915_WRITE(CACHE_MODE_1,
6231                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
6232
6233         /*
6234          * BSpec recommends 8x4 when MSAA is used,
6235          * however in practice 16x4 seems fastest.
6236          *
6237          * Note that PS/WM thread counts depend on the WIZ hashing
6238          * disable bit, which we don't touch here, but it's good
6239          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
6240          */
6241         I915_WRITE(GEN7_GT_MODE,
6242                    _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
6243
6244         /*
6245          * WaIncreaseL3CreditsForVLVB0:vlv
6246          * This is the hardware default actually.
6247          */
6248         I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
6249
6250         /*
6251          * WaDisableVLVClockGating_VBIIssue:vlv
6252          * Disable clock gating on th GCFG unit to prevent a delay
6253          * in the reporting of vblank events.
6254          */
6255         I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
6256 }
6257
6258 static void cherryview_init_clock_gating(struct drm_device *dev)
6259 {
6260         struct drm_i915_private *dev_priv = dev->dev_private;
6261
6262         I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
6263
6264         I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
6265
6266         /* WaVSRefCountFullforceMissDisable:chv */
6267         /* WaDSRefCountFullforceMissDisable:chv */
6268         I915_WRITE(GEN7_FF_THREAD_MODE,
6269                    I915_READ(GEN7_FF_THREAD_MODE) &
6270                    ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
6271
6272         /* WaDisableSemaphoreAndSyncFlipWait:chv */
6273         I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
6274                    _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
6275
6276         /* WaDisableCSUnitClockGating:chv */
6277         I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
6278                    GEN6_CSUNIT_CLOCK_GATE_DISABLE);
6279
6280         /* WaDisableSDEUnitClockGating:chv */
6281         I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
6282                    GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
6283 }
6284
6285 static void g4x_init_clock_gating(struct drm_device *dev)
6286 {
6287         struct drm_i915_private *dev_priv = dev->dev_private;
6288         uint32_t dspclk_gate;
6289
6290         I915_WRITE(RENCLK_GATE_D1, 0);
6291         I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
6292                    GS_UNIT_CLOCK_GATE_DISABLE |
6293                    CL_UNIT_CLOCK_GATE_DISABLE);
6294         I915_WRITE(RAMCLK_GATE_D, 0);
6295         dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
6296                 OVRUNIT_CLOCK_GATE_DISABLE |
6297                 OVCUNIT_CLOCK_GATE_DISABLE;
6298         if (IS_GM45(dev))
6299                 dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
6300         I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
6301
6302         /* WaDisableRenderCachePipelinedFlush */
6303         I915_WRITE(CACHE_MODE_0,
6304                    _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
6305
6306         /* WaDisable_RenderCache_OperationalFlush:g4x */
6307         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6308
6309         g4x_disable_trickle_feed(dev);
6310 }
6311
6312 static void crestline_init_clock_gating(struct drm_device *dev)
6313 {
6314         struct drm_i915_private *dev_priv = dev->dev_private;
6315
6316         I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
6317         I915_WRITE(RENCLK_GATE_D2, 0);
6318         I915_WRITE(DSPCLK_GATE_D, 0);
6319         I915_WRITE(RAMCLK_GATE_D, 0);
6320         I915_WRITE16(DEUC, 0);
6321         I915_WRITE(MI_ARB_STATE,
6322                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
6323
6324         /* WaDisable_RenderCache_OperationalFlush:gen4 */
6325         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6326 }
6327
6328 static void broadwater_init_clock_gating(struct drm_device *dev)
6329 {
6330         struct drm_i915_private *dev_priv = dev->dev_private;
6331
6332         I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
6333                    I965_RCC_CLOCK_GATE_DISABLE |
6334                    I965_RCPB_CLOCK_GATE_DISABLE |
6335                    I965_ISC_CLOCK_GATE_DISABLE |
6336                    I965_FBC_CLOCK_GATE_DISABLE);
6337         I915_WRITE(RENCLK_GATE_D2, 0);
6338         I915_WRITE(MI_ARB_STATE,
6339                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
6340
6341         /* WaDisable_RenderCache_OperationalFlush:gen4 */
6342         I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
6343 }
6344
6345 static void gen3_init_clock_gating(struct drm_device *dev)
6346 {
6347         struct drm_i915_private *dev_priv = dev->dev_private;
6348         u32 dstate = I915_READ(D_STATE);
6349
6350         dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
6351                 DSTATE_DOT_CLOCK_GATING;
6352         I915_WRITE(D_STATE, dstate);
6353
6354         if (IS_PINEVIEW(dev))
6355                 I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
6356
6357         /* IIR "flip pending" means done if this bit is set */
6358         I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
6359
6360         /* interrupts should cause a wake up from C3 */
6361         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
6362
6363         /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
6364         I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
6365
6366         I915_WRITE(MI_ARB_STATE,
6367                    _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
6368 }
6369
6370 static void i85x_init_clock_gating(struct drm_device *dev)
6371 {
6372         struct drm_i915_private *dev_priv = dev->dev_private;
6373
6374         I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
6375
6376         /* interrupts should cause a wake up from C3 */
6377         I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
6378                    _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
6379
6380         I915_WRITE(MEM_MODE,
6381                    _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
6382 }
6383
6384 static void i830_init_clock_gating(struct drm_device *dev)
6385 {
6386         struct drm_i915_private *dev_priv = dev->dev_private;
6387
6388         I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
6389
6390         I915_WRITE(MEM_MODE,
6391                    _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
6392                    _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
6393 }
6394
6395 void intel_init_clock_gating(struct drm_device *dev)
6396 {
6397         struct drm_i915_private *dev_priv = dev->dev_private;
6398
6399         dev_priv->display.init_clock_gating(dev);
6400 }
6401
6402 void intel_suspend_hw(struct drm_device *dev)
6403 {
6404         if (HAS_PCH_LPT(dev))
6405                 lpt_suspend_hw(dev);
6406 }
6407
6408 /* Set up chip specific power management-related functions */
6409 void intel_init_pm(struct drm_device *dev)
6410 {
6411         struct drm_i915_private *dev_priv = dev->dev_private;
6412
6413         intel_fbc_init(dev_priv);
6414
6415         /* For cxsr */
6416         if (IS_PINEVIEW(dev))
6417                 i915_pineview_get_mem_freq(dev);
6418         else if (IS_GEN5(dev))
6419                 i915_ironlake_get_mem_freq(dev);
6420
6421         /* For FIFO watermark updates */
6422         if (INTEL_INFO(dev)->gen >= 9) {
6423                 skl_setup_wm_latency(dev);
6424
6425                 dev_priv->display.init_clock_gating = gen9_init_clock_gating;
6426                 dev_priv->display.update_wm = skl_update_wm;
6427                 dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
6428         } else if (HAS_PCH_SPLIT(dev)) {
6429                 ilk_setup_wm_latency(dev);
6430
6431                 if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] &&
6432                      dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) ||
6433                     (!IS_GEN5(dev) && dev_priv->wm.pri_latency[0] &&
6434                      dev_priv->wm.spr_latency[0] && dev_priv->wm.cur_latency[0])) {
6435                         dev_priv->display.update_wm = ilk_update_wm;
6436                         dev_priv->display.update_sprite_wm = ilk_update_sprite_wm;
6437                 } else {
6438                         DRM_DEBUG_KMS("Failed to read display plane latency. "
6439                                       "Disable CxSR\n");
6440                 }
6441
6442                 if (IS_GEN5(dev))
6443                         dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
6444                 else if (IS_GEN6(dev))
6445                         dev_priv->display.init_clock_gating = gen6_init_clock_gating;
6446                 else if (IS_IVYBRIDGE(dev))
6447                         dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
6448                 else if (IS_HASWELL(dev))
6449                         dev_priv->display.init_clock_gating = haswell_init_clock_gating;
6450                 else if (INTEL_INFO(dev)->gen == 8)
6451                         dev_priv->display.init_clock_gating = broadwell_init_clock_gating;
6452         } else if (IS_CHERRYVIEW(dev)) {
6453                 dev_priv->display.update_wm = cherryview_update_wm;
6454                 dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
6455                 dev_priv->display.init_clock_gating =
6456                         cherryview_init_clock_gating;
6457         } else if (IS_VALLEYVIEW(dev)) {
6458                 dev_priv->display.update_wm = valleyview_update_wm;
6459                 dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
6460                 dev_priv->display.init_clock_gating =
6461                         valleyview_init_clock_gating;
6462         } else if (IS_PINEVIEW(dev)) {
6463                 if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev),
6464                                             dev_priv->is_ddr3,
6465                                             dev_priv->fsb_freq,
6466                                             dev_priv->mem_freq)) {
6467                         DRM_INFO("failed to find known CxSR latency "
6468                                  "(found ddr%s fsb freq %d, mem freq %d), "
6469                                  "disabling CxSR\n",
6470                                  (dev_priv->is_ddr3 == 1) ? "3" : "2",
6471                                  dev_priv->fsb_freq, dev_priv->mem_freq);
6472                         /* Disable CxSR and never update its watermark again */
6473                         intel_set_memory_cxsr(dev_priv, false);
6474                         dev_priv->display.update_wm = NULL;
6475                 } else
6476                         dev_priv->display.update_wm = pineview_update_wm;
6477                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
6478         } else if (IS_G4X(dev)) {
6479                 dev_priv->display.update_wm = g4x_update_wm;
6480                 dev_priv->display.init_clock_gating = g4x_init_clock_gating;
6481         } else if (IS_GEN4(dev)) {
6482                 dev_priv->display.update_wm = i965_update_wm;
6483                 if (IS_CRESTLINE(dev))
6484                         dev_priv->display.init_clock_gating = crestline_init_clock_gating;
6485                 else if (IS_BROADWATER(dev))
6486                         dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
6487         } else if (IS_GEN3(dev)) {
6488                 dev_priv->display.update_wm = i9xx_update_wm;
6489                 dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
6490                 dev_priv->display.init_clock_gating = gen3_init_clock_gating;
6491         } else if (IS_GEN2(dev)) {
6492                 if (INTEL_INFO(dev)->num_pipes == 1) {
6493                         dev_priv->display.update_wm = i845_update_wm;
6494                         dev_priv->display.get_fifo_size = i845_get_fifo_size;
6495                 } else {
6496                         dev_priv->display.update_wm = i9xx_update_wm;
6497                         dev_priv->display.get_fifo_size = i830_get_fifo_size;
6498                 }
6499
6500                 if (IS_I85X(dev) || IS_I865G(dev))
6501                         dev_priv->display.init_clock_gating = i85x_init_clock_gating;
6502                 else
6503                         dev_priv->display.init_clock_gating = i830_init_clock_gating;
6504         } else {
6505                 DRM_ERROR("unexpected fall-through in intel_init_pm\n");
6506         }
6507 }
6508
6509 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
6510 {
6511         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
6512
6513         if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
6514                 DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n");
6515                 return -EAGAIN;
6516         }
6517
6518         I915_WRITE(GEN6_PCODE_DATA, *val);
6519         I915_WRITE(GEN6_PCODE_DATA1, 0);
6520         I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
6521
6522         if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
6523                      500)) {
6524                 DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
6525                 return -ETIMEDOUT;
6526         }
6527
6528         *val = I915_READ(GEN6_PCODE_DATA);
6529         I915_WRITE(GEN6_PCODE_DATA, 0);
6530
6531         return 0;
6532 }
6533
6534 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val)
6535 {
6536         WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
6537
6538         if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
6539                 DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n");
6540                 return -EAGAIN;
6541         }
6542
6543         I915_WRITE(GEN6_PCODE_DATA, val);
6544         I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
6545
6546         if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
6547                      500)) {
6548                 DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
6549                 return -ETIMEDOUT;
6550         }
6551
6552         I915_WRITE(GEN6_PCODE_DATA, 0);
6553
6554         return 0;
6555 }
6556
6557 static int vlv_gpu_freq_div(unsigned int czclk_freq)
6558 {
6559         switch (czclk_freq) {
6560         case 200:
6561                 return 10;
6562         case 267:
6563                 return 12;
6564         case 320:
6565         case 333:
6566                 return 16;
6567         case 400:
6568                 return 20;
6569         default:
6570                 return -1;
6571         }
6572 }
6573
6574 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
6575 {
6576         int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4);
6577
6578         div = vlv_gpu_freq_div(czclk_freq);
6579         if (div < 0)
6580                 return div;
6581
6582         return DIV_ROUND_CLOSEST(czclk_freq * (val + 6 - 0xbd), div);
6583 }
6584
6585 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
6586 {
6587         int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4);
6588
6589         mul = vlv_gpu_freq_div(czclk_freq);
6590         if (mul < 0)
6591                 return mul;
6592
6593         return DIV_ROUND_CLOSEST(mul * val, czclk_freq) + 0xbd - 6;
6594 }
6595
6596 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
6597 {
6598         int div, czclk_freq = dev_priv->rps.cz_freq;
6599
6600         div = vlv_gpu_freq_div(czclk_freq) / 2;
6601         if (div < 0)
6602                 return div;
6603
6604         return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2;
6605 }
6606
6607 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
6608 {
6609         int mul, czclk_freq = dev_priv->rps.cz_freq;
6610
6611         mul = vlv_gpu_freq_div(czclk_freq) / 2;
6612         if (mul < 0)
6613                 return mul;
6614
6615         /* CHV needs even values */
6616         return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2;
6617 }
6618
6619 int intel_gpu_freq(struct drm_i915_private *dev_priv, int val)
6620 {
6621         if (IS_CHERRYVIEW(dev_priv->dev))
6622                 return chv_gpu_freq(dev_priv, val);
6623         else if (IS_VALLEYVIEW(dev_priv->dev))
6624                 return byt_gpu_freq(dev_priv, val);
6625         else
6626                 return val * GT_FREQUENCY_MULTIPLIER;
6627 }
6628
6629 int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
6630 {
6631         if (IS_CHERRYVIEW(dev_priv->dev))
6632                 return chv_freq_opcode(dev_priv, val);
6633         else if (IS_VALLEYVIEW(dev_priv->dev))
6634                 return byt_freq_opcode(dev_priv, val);
6635         else
6636                 return val / GT_FREQUENCY_MULTIPLIER;
6637 }
6638
6639 void intel_pm_setup(struct drm_device *dev)
6640 {
6641         struct drm_i915_private *dev_priv = dev->dev_private;
6642
6643         mutex_init(&dev_priv->rps.hw_lock);
6644
6645         INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
6646                           intel_gen6_powersave_work);
6647
6648         dev_priv->pm.suspended = false;
6649 }