Merge tag 'drm-intel-next-2014-12-19' of git://anongit.freedesktop.org/drm-intel...
[cascardo/linux.git] / drivers / gpu / drm / i915 / intel_pm.c
index ad2fd60..a3ebaa8 100644 (file)
@@ -30,9 +30,6 @@
 #include "intel_drv.h"
 #include "../../../platform/x86/intel_ips.h"
 #include <linux/module.h>
-#include <linux/vgaarb.h>
-#include <drm/i915_powerwell.h>
-#include <linux/pm_runtime.h>
 
 /**
  * RC6 is a special power stage which allows the GPU to enter an very
 #define INTEL_RC6p_ENABLE                      (1<<1)
 #define INTEL_RC6pp_ENABLE                     (1<<2)
 
-/* FBC, or Frame Buffer Compression, is a technique employed to compress the
- * framebuffer contents in-memory, aiming at reducing the required bandwidth
- * during in-memory transfers and, therefore, reduce the power packet.
- *
- * The benefits of FBC are mostly visible with solid backgrounds and
- * variation-less patterns.
- *
- * FBC-related functionality can be enabled by the means of the
- * i915.i915_enable_fbc parameter
- */
-
-static void i8xx_disable_fbc(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 fbc_ctl;
-
-       /* Disable compression */
-       fbc_ctl = I915_READ(FBC_CONTROL);
-       if ((fbc_ctl & FBC_CTL_EN) == 0)
-               return;
-
-       fbc_ctl &= ~FBC_CTL_EN;
-       I915_WRITE(FBC_CONTROL, fbc_ctl);
-
-       /* Wait for compressing bit to clear */
-       if (wait_for((I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) == 0, 10)) {
-               DRM_DEBUG_KMS("FBC idle timed out\n");
-               return;
-       }
-
-       DRM_DEBUG_KMS("disabled FBC\n");
-}
-
-static void i8xx_enable_fbc(struct drm_crtc *crtc)
-{
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct drm_framebuffer *fb = crtc->primary->fb;
-       struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       int cfb_pitch;
-       int i;
-       u32 fbc_ctl;
-
-       cfb_pitch = dev_priv->fbc.size / FBC_LL_SIZE;
-       if (fb->pitches[0] < cfb_pitch)
-               cfb_pitch = fb->pitches[0];
-
-       /* FBC_CTL wants 32B or 64B units */
-       if (IS_GEN2(dev))
-               cfb_pitch = (cfb_pitch / 32) - 1;
-       else
-               cfb_pitch = (cfb_pitch / 64) - 1;
-
-       /* Clear old tags */
-       for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
-               I915_WRITE(FBC_TAG + (i * 4), 0);
-
-       if (IS_GEN4(dev)) {
-               u32 fbc_ctl2;
-
-               /* Set it up... */
-               fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE;
-               fbc_ctl2 |= FBC_CTL_PLANE(intel_crtc->plane);
-               I915_WRITE(FBC_CONTROL2, fbc_ctl2);
-               I915_WRITE(FBC_FENCE_OFF, crtc->y);
-       }
-
-       /* enable it... */
-       fbc_ctl = I915_READ(FBC_CONTROL);
-       fbc_ctl &= 0x3fff << FBC_CTL_INTERVAL_SHIFT;
-       fbc_ctl |= FBC_CTL_EN | FBC_CTL_PERIODIC;
-       if (IS_I945GM(dev))
-               fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */
-       fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
-       fbc_ctl |= obj->fence_reg;
-       I915_WRITE(FBC_CONTROL, fbc_ctl);
-
-       DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %c\n",
-                     cfb_pitch, crtc->y, plane_name(intel_crtc->plane));
-}
-
-static bool i8xx_fbc_enabled(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
-}
-
-static void g4x_enable_fbc(struct drm_crtc *crtc)
-{
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct drm_framebuffer *fb = crtc->primary->fb;
-       struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       u32 dpfc_ctl;
-
-       dpfc_ctl = DPFC_CTL_PLANE(intel_crtc->plane) | DPFC_SR_EN;
-       if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
-               dpfc_ctl |= DPFC_CTL_LIMIT_2X;
-       else
-               dpfc_ctl |= DPFC_CTL_LIMIT_1X;
-       dpfc_ctl |= DPFC_CTL_FENCE_EN | obj->fence_reg;
-
-       I915_WRITE(DPFC_FENCE_YOFF, crtc->y);
-
-       /* enable it... */
-       I915_WRITE(DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
-
-       DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
-}
-
-static void g4x_disable_fbc(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 dpfc_ctl;
-
-       /* Disable compression */
-       dpfc_ctl = I915_READ(DPFC_CONTROL);
-       if (dpfc_ctl & DPFC_CTL_EN) {
-               dpfc_ctl &= ~DPFC_CTL_EN;
-               I915_WRITE(DPFC_CONTROL, dpfc_ctl);
-
-               DRM_DEBUG_KMS("disabled FBC\n");
-       }
-}
-
-static bool g4x_fbc_enabled(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
-}
-
-static void sandybridge_blit_fbc_update(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 blt_ecoskpd;
-
-       /* Make sure blitter notifies FBC of writes */
-
-       /* Blitter is part of Media powerwell on VLV. No impact of
-        * his param in other platforms for now */
-       gen6_gt_force_wake_get(dev_priv, FORCEWAKE_MEDIA);
-
-       blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
-       blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
-               GEN6_BLITTER_LOCK_SHIFT;
-       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
-       blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY;
-       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
-       blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY <<
-                        GEN6_BLITTER_LOCK_SHIFT);
-       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
-       POSTING_READ(GEN6_BLITTER_ECOSKPD);
-
-       gen6_gt_force_wake_put(dev_priv, FORCEWAKE_MEDIA);
-}
-
-static void ironlake_enable_fbc(struct drm_crtc *crtc)
-{
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct drm_framebuffer *fb = crtc->primary->fb;
-       struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       u32 dpfc_ctl;
-
-       dpfc_ctl = DPFC_CTL_PLANE(intel_crtc->plane);
-       if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
-               dev_priv->fbc.threshold++;
-
-       switch (dev_priv->fbc.threshold) {
-       case 4:
-       case 3:
-               dpfc_ctl |= DPFC_CTL_LIMIT_4X;
-               break;
-       case 2:
-               dpfc_ctl |= DPFC_CTL_LIMIT_2X;
-               break;
-       case 1:
-               dpfc_ctl |= DPFC_CTL_LIMIT_1X;
-               break;
-       }
-       dpfc_ctl |= DPFC_CTL_FENCE_EN;
-       if (IS_GEN5(dev))
-               dpfc_ctl |= obj->fence_reg;
-
-       I915_WRITE(ILK_DPFC_FENCE_YOFF, crtc->y);
-       I915_WRITE(ILK_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj) | ILK_FBC_RT_VALID);
-       /* enable it... */
-       I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
-
-       if (IS_GEN6(dev)) {
-               I915_WRITE(SNB_DPFC_CTL_SA,
-                          SNB_CPU_FENCE_ENABLE | obj->fence_reg);
-               I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
-               sandybridge_blit_fbc_update(dev);
-       }
-
-       DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
-}
-
-static void ironlake_disable_fbc(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 dpfc_ctl;
-
-       /* Disable compression */
-       dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
-       if (dpfc_ctl & DPFC_CTL_EN) {
-               dpfc_ctl &= ~DPFC_CTL_EN;
-               I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl);
-
-               DRM_DEBUG_KMS("disabled FBC\n");
-       }
-}
-
-static bool ironlake_fbc_enabled(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       return I915_READ(ILK_DPFC_CONTROL) & DPFC_CTL_EN;
-}
-
-static void gen7_enable_fbc(struct drm_crtc *crtc)
-{
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct drm_framebuffer *fb = crtc->primary->fb;
-       struct drm_i915_gem_object *obj = intel_fb_obj(fb);
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       u32 dpfc_ctl;
-
-       dpfc_ctl = IVB_DPFC_CTL_PLANE(intel_crtc->plane);
-       if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
-               dev_priv->fbc.threshold++;
-
-       switch (dev_priv->fbc.threshold) {
-       case 4:
-       case 3:
-               dpfc_ctl |= DPFC_CTL_LIMIT_4X;
-               break;
-       case 2:
-               dpfc_ctl |= DPFC_CTL_LIMIT_2X;
-               break;
-       case 1:
-               dpfc_ctl |= DPFC_CTL_LIMIT_1X;
-               break;
-       }
-
-       dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN;
-
-       if (dev_priv->fbc.false_color)
-               dpfc_ctl |= FBC_CTL_FALSE_COLOR;
-
-       I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
-
-       if (IS_IVYBRIDGE(dev)) {
-               /* WaFbcAsynchFlipDisableFbcQueue:ivb */
-               I915_WRITE(ILK_DISPLAY_CHICKEN1,
-                          I915_READ(ILK_DISPLAY_CHICKEN1) |
-                          ILK_FBCQ_DIS);
-       } else {
-               /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */
-               I915_WRITE(CHICKEN_PIPESL_1(intel_crtc->pipe),
-                          I915_READ(CHICKEN_PIPESL_1(intel_crtc->pipe)) |
-                          HSW_FBCQ_DIS);
-       }
-
-       I915_WRITE(SNB_DPFC_CTL_SA,
-                  SNB_CPU_FENCE_ENABLE | obj->fence_reg);
-       I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
-
-       sandybridge_blit_fbc_update(dev);
-
-       DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
-}
-
-bool intel_fbc_enabled(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       if (!dev_priv->display.fbc_enabled)
-               return false;
-
-       return dev_priv->display.fbc_enabled(dev);
-}
-
-void gen8_fbc_sw_flush(struct drm_device *dev, u32 value)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       if (!IS_GEN8(dev))
-               return;
-
-       I915_WRITE(MSG_FBC_REND_STATE, value);
-}
-
-static void intel_fbc_work_fn(struct work_struct *__work)
-{
-       struct intel_fbc_work *work =
-               container_of(to_delayed_work(__work),
-                            struct intel_fbc_work, work);
-       struct drm_device *dev = work->crtc->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       mutex_lock(&dev->struct_mutex);
-       if (work == dev_priv->fbc.fbc_work) {
-               /* Double check that we haven't switched fb without cancelling
-                * the prior work.
-                */
-               if (work->crtc->primary->fb == work->fb) {
-                       dev_priv->display.enable_fbc(work->crtc);
-
-                       dev_priv->fbc.plane = to_intel_crtc(work->crtc)->plane;
-                       dev_priv->fbc.fb_id = work->crtc->primary->fb->base.id;
-                       dev_priv->fbc.y = work->crtc->y;
-               }
-
-               dev_priv->fbc.fbc_work = NULL;
-       }
-       mutex_unlock(&dev->struct_mutex);
-
-       kfree(work);
-}
-
-static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv)
-{
-       if (dev_priv->fbc.fbc_work == NULL)
-               return;
-
-       DRM_DEBUG_KMS("cancelling pending FBC enable\n");
-
-       /* Synchronisation is provided by struct_mutex and checking of
-        * dev_priv->fbc.fbc_work, so we can perform the cancellation
-        * entirely asynchronously.
-        */
-       if (cancel_delayed_work(&dev_priv->fbc.fbc_work->work))
-               /* tasklet was killed before being run, clean up */
-               kfree(dev_priv->fbc.fbc_work);
-
-       /* Mark the work as no longer wanted so that if it does
-        * wake-up (because the work was already running and waiting
-        * for our mutex), it will discover that is no longer
-        * necessary to run.
-        */
-       dev_priv->fbc.fbc_work = NULL;
-}
-
-static void intel_enable_fbc(struct drm_crtc *crtc)
-{
-       struct intel_fbc_work *work;
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       if (!dev_priv->display.enable_fbc)
-               return;
-
-       intel_cancel_fbc_work(dev_priv);
-
-       work = kzalloc(sizeof(*work), GFP_KERNEL);
-       if (work == NULL) {
-               DRM_ERROR("Failed to allocate FBC work structure\n");
-               dev_priv->display.enable_fbc(crtc);
-               return;
-       }
-
-       work->crtc = crtc;
-       work->fb = crtc->primary->fb;
-       INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn);
-
-       dev_priv->fbc.fbc_work = work;
-
-       /* Delay the actual enabling to let pageflipping cease and the
-        * display to settle before starting the compression. Note that
-        * this delay also serves a second purpose: it allows for a
-        * vblank to pass after disabling the FBC before we attempt
-        * to modify the control registers.
-        *
-        * A more complicated solution would involve tracking vblanks
-        * following the termination of the page-flipping sequence
-        * and indeed performing the enable as a co-routine and not
-        * waiting synchronously upon the vblank.
-        *
-        * WaFbcWaitForVBlankBeforeEnable:ilk,snb
-        */
-       schedule_delayed_work(&work->work, msecs_to_jiffies(50));
-}
-
-void intel_disable_fbc(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       intel_cancel_fbc_work(dev_priv);
-
-       if (!dev_priv->display.disable_fbc)
-               return;
-
-       dev_priv->display.disable_fbc(dev);
-       dev_priv->fbc.plane = -1;
-}
-
-static bool set_no_fbc_reason(struct drm_i915_private *dev_priv,
-                             enum no_fbc_reason reason)
-{
-       if (dev_priv->fbc.no_fbc_reason == reason)
-               return false;
-
-       dev_priv->fbc.no_fbc_reason = reason;
-       return true;
-}
-
-/**
- * intel_update_fbc - enable/disable FBC as needed
- * @dev: the drm_device
- *
- * Set up the framebuffer compression hardware at mode set time.  We
- * enable it if possible:
- *   - plane A only (on pre-965)
- *   - no pixel mulitply/line duplication
- *   - no alpha buffer discard
- *   - no dual wide
- *   - framebuffer <= max_hdisplay in width, max_vdisplay in height
- *
- * We can't assume that any compression will take place (worst case),
- * so the compressed buffer has to be the same size as the uncompressed
- * one.  It also must reside (along with the line length buffer) in
- * stolen memory.
- *
- * We need to enable/disable FBC on a global basis.
- */
-void intel_update_fbc(struct drm_device *dev)
+static void gen9_init_clock_gating(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       struct drm_crtc *crtc = NULL, *tmp_crtc;
-       struct intel_crtc *intel_crtc;
-       struct drm_framebuffer *fb;
-       struct drm_i915_gem_object *obj;
-       const struct drm_display_mode *adjusted_mode;
-       unsigned int max_width, max_height;
-
-       if (!HAS_FBC(dev)) {
-               set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED);
-               return;
-       }
-
-       if (!i915.powersave) {
-               if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM))
-                       DRM_DEBUG_KMS("fbc disabled per module param\n");
-               return;
-       }
 
        /*
-        * If FBC is already on, we just have to verify that we can
-        * keep it that way...
-        * Need to disable if:
-        *   - more than one pipe is active
-        *   - changing FBC params (stride, fence, mode)
-        *   - new fb is too large to fit in compressed buffer
-        *   - going to an unsupported config (interlace, pixel multiply, etc.)
+        * WaDisableSDEUnitClockGating:skl
+        * This seems to be a pre-production w/a.
         */
-       for_each_crtc(dev, tmp_crtc) {
-               if (intel_crtc_active(tmp_crtc) &&
-                   to_intel_crtc(tmp_crtc)->primary_enabled) {
-                       if (crtc) {
-                               if (set_no_fbc_reason(dev_priv, FBC_MULTIPLE_PIPES))
-                                       DRM_DEBUG_KMS("more than one pipe active, disabling compression\n");
-                               goto out_disable;
-                       }
-                       crtc = tmp_crtc;
-               }
-       }
-
-       if (!crtc || crtc->primary->fb == NULL) {
-               if (set_no_fbc_reason(dev_priv, FBC_NO_OUTPUT))
-                       DRM_DEBUG_KMS("no output, disabling\n");
-               goto out_disable;
-       }
-
-       intel_crtc = to_intel_crtc(crtc);
-       fb = crtc->primary->fb;
-       obj = intel_fb_obj(fb);
-       adjusted_mode = &intel_crtc->config.adjusted_mode;
-
-       if (i915.enable_fbc < 0) {
-               if (set_no_fbc_reason(dev_priv, FBC_CHIP_DEFAULT))
-                       DRM_DEBUG_KMS("disabled per chip default\n");
-               goto out_disable;
-       }
-       if (!i915.enable_fbc) {
-               if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM))
-                       DRM_DEBUG_KMS("fbc disabled per module param\n");
-               goto out_disable;
-       }
-       if ((adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) ||
-           (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)) {
-               if (set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE))
-                       DRM_DEBUG_KMS("mode incompatible with compression, "
-                                     "disabling\n");
-               goto out_disable;
-       }
-
-       if (INTEL_INFO(dev)->gen >= 8 || IS_HASWELL(dev)) {
-               max_width = 4096;
-               max_height = 4096;
-       } else if (IS_G4X(dev) || INTEL_INFO(dev)->gen >= 5) {
-               max_width = 4096;
-               max_height = 2048;
-       } else {
-               max_width = 2048;
-               max_height = 1536;
-       }
-       if (intel_crtc->config.pipe_src_w > max_width ||
-           intel_crtc->config.pipe_src_h > max_height) {
-               if (set_no_fbc_reason(dev_priv, FBC_MODE_TOO_LARGE))
-                       DRM_DEBUG_KMS("mode too large for compression, disabling\n");
-               goto out_disable;
-       }
-       if ((INTEL_INFO(dev)->gen < 4 || HAS_DDI(dev)) &&
-           intel_crtc->plane != PLANE_A) {
-               if (set_no_fbc_reason(dev_priv, FBC_BAD_PLANE))
-                       DRM_DEBUG_KMS("plane not A, disabling compression\n");
-               goto out_disable;
-       }
+       I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+                  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
-       /* The use of a CPU fence is mandatory in order to detect writes
-        * by the CPU to the scanout and trigger updates to the FBC.
-        */
-       if (obj->tiling_mode != I915_TILING_X ||
-           obj->fence_reg == I915_FENCE_REG_NONE) {
-               if (set_no_fbc_reason(dev_priv, FBC_NOT_TILED))
-                       DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n");
-               goto out_disable;
-       }
-       if (INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev) &&
-           to_intel_plane(crtc->primary)->rotation != BIT(DRM_ROTATE_0)) {
-               if (set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE))
-                       DRM_DEBUG_KMS("Rotation unsupported, disabling\n");
-               goto out_disable;
-       }
-
-       /* If the kernel debugger is active, always disable compression */
-       if (in_dbg_master())
-               goto out_disable;
-
-       if (i915_gem_stolen_setup_compression(dev, obj->base.size,
-                                             drm_format_plane_cpp(fb->pixel_format, 0))) {
-               if (set_no_fbc_reason(dev_priv, FBC_STOLEN_TOO_SMALL))
-                       DRM_DEBUG_KMS("framebuffer too large, disabling compression\n");
-               goto out_disable;
-       }
-
-       /* If the scanout has not changed, don't modify the FBC settings.
-        * Note that we make the fundamental assumption that the fb->obj
-        * cannot be unpinned (and have its GTT offset and fence revoked)
-        * without first being decoupled from the scanout and FBC disabled.
+       /*
+        * WaDisableDgMirrorFixInHalfSliceChicken5:skl
+        * This is a pre-production w/a.
         */
-       if (dev_priv->fbc.plane == intel_crtc->plane &&
-           dev_priv->fbc.fb_id == fb->base.id &&
-           dev_priv->fbc.y == crtc->y)
-               return;
-
-       if (intel_fbc_enabled(dev)) {
-               /* We update FBC along two paths, after changing fb/crtc
-                * configuration (modeswitching) and after page-flipping
-                * finishes. For the latter, we know that not only did
-                * we disable the FBC at the start of the page-flip
-                * sequence, but also more than one vblank has passed.
-                *
-                * For the former case of modeswitching, it is possible
-                * to switch between two FBC valid configurations
-                * instantaneously so we do need to disable the FBC
-                * before we can modify its control registers. We also
-                * have to wait for the next vblank for that to take
-                * effect. However, since we delay enabling FBC we can
-                * assume that a vblank has passed since disabling and
-                * that we can safely alter the registers in the deferred
-                * callback.
-                *
-                * In the scenario that we go from a valid to invalid
-                * and then back to valid FBC configuration we have
-                * no strict enforcement that a vblank occurred since
-                * disabling the FBC. However, along all current pipe
-                * disabling paths we do need to wait for a vblank at
-                * some point. And we wait before enabling FBC anyway.
-                */
-               DRM_DEBUG_KMS("disabling active FBC for update\n");
-               intel_disable_fbc(dev);
-       }
-
-       intel_enable_fbc(crtc);
-       dev_priv->fbc.no_fbc_reason = FBC_OK;
-       return;
+       I915_WRITE(GEN9_HALF_SLICE_CHICKEN5,
+                  I915_READ(GEN9_HALF_SLICE_CHICKEN5) &
+                  ~GEN9_DG_MIRROR_FIX_ENABLE);
 
-out_disable:
-       /* Multiple disables should be harmless */
-       if (intel_fbc_enabled(dev)) {
-               DRM_DEBUG_KMS("unsupported config, disabling FBC\n");
-               intel_disable_fbc(dev);
-       }
-       i915_gem_stolen_cleanup_compression(dev);
+       /* Wa4x4STCOptimizationDisable:skl */
+       I915_WRITE(CACHE_MODE_1,
+                  _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
 }
 
+
 static void i915_pineview_get_mem_freq(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1310,6 +727,7 @@ static bool vlv_compute_drain_latency(struct drm_crtc *crtc,
                                      int *prec_mult,
                                      int *drain_latency)
 {
+       struct drm_device *dev = crtc->dev;
        int entries;
        int clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock;
 
@@ -1320,8 +738,12 @@ static bool vlv_compute_drain_latency(struct drm_crtc *crtc,
                return false;
 
        entries = DIV_ROUND_UP(clock, 1000) * pixel_size;
-       *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 :
-                                      DRAIN_LATENCY_PRECISION_32;
+       if (IS_CHERRYVIEW(dev))
+               *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_32 :
+                                              DRAIN_LATENCY_PRECISION_16;
+       else
+               *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 :
+                                              DRAIN_LATENCY_PRECISION_32;
        *drain_latency = (64 * (*prec_mult) * 4) / entries;
 
        if (*drain_latency > DRAIN_LATENCY_MASK)
@@ -1340,15 +762,18 @@ static bool vlv_compute_drain_latency(struct drm_crtc *crtc,
 
 static void vlv_update_drain_latency(struct drm_crtc *crtc)
 {
-       struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        int pixel_size;
        int drain_latency;
        enum pipe pipe = intel_crtc->pipe;
        int plane_prec, prec_mult, plane_dl;
+       const int high_precision = IS_CHERRYVIEW(dev) ?
+               DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64;
 
-       plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_64 |
-                  DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_64 |
+       plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_HIGH |
+                  DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_HIGH |
                   (DRAIN_LATENCY_MASK << DDL_CURSOR_SHIFT));
 
        if (!intel_crtc_active(crtc)) {
@@ -1359,9 +784,9 @@ static void vlv_update_drain_latency(struct drm_crtc *crtc)
        /* Primary plane Drain Latency */
        pixel_size = crtc->primary->fb->bits_per_pixel / 8;     /* BPP */
        if (vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) {
-               plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ?
-                                          DDL_PLANE_PRECISION_64 :
-                                          DDL_PLANE_PRECISION_32;
+               plane_prec = (prec_mult == high_precision) ?
+                                          DDL_PLANE_PRECISION_HIGH :
+                                          DDL_PLANE_PRECISION_LOW;
                plane_dl |= plane_prec | drain_latency;
        }
 
@@ -1373,9 +798,9 @@ static void vlv_update_drain_latency(struct drm_crtc *crtc)
        /* Program cursor DL only if it is enabled */
        if (intel_crtc->cursor_base &&
            vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) {
-               plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ?
-                                          DDL_CURSOR_PRECISION_64 :
-                                          DDL_CURSOR_PRECISION_32;
+               plane_prec = (prec_mult == high_precision) ?
+                                          DDL_CURSOR_PRECISION_HIGH :
+                                          DDL_CURSOR_PRECISION_LOW;
                plane_dl |= plane_prec | (drain_latency << DDL_CURSOR_SHIFT);
        }
 
@@ -1543,15 +968,17 @@ static void valleyview_update_sprite_wm(struct drm_plane *plane,
        int plane_prec;
        int sprite_dl;
        int prec_mult;
+       const int high_precision = IS_CHERRYVIEW(dev) ?
+               DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64;
 
-       sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_64(sprite) |
+       sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_HIGH(sprite) |
                    (DRAIN_LATENCY_MASK << DDL_SPRITE_SHIFT(sprite)));
 
        if (enabled && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult,
                                                 &drain_latency)) {
-               plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ?
-                                          DDL_SPRITE_PRECISION_64(sprite) :
-                                          DDL_SPRITE_PRECISION_32(sprite);
+               plane_prec = (prec_mult == high_precision) ?
+                                          DDL_SPRITE_PRECISION_HIGH(sprite) :
+                                          DDL_SPRITE_PRECISION_LOW(sprite);
                sprite_dl |= plane_prec |
                             (drain_latency << DDL_SPRITE_SHIFT(sprite));
        }
@@ -1915,6 +1342,14 @@ static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
        return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
 }
 
+struct skl_pipe_wm_parameters {
+       bool active;
+       uint32_t pipe_htotal;
+       uint32_t pixel_rate; /* in KHz */
+       struct intel_plane_wm_parameters plane[I915_MAX_PLANES];
+       struct intel_plane_wm_parameters cursor;
+};
+
 struct ilk_pipe_wm_parameters {
        bool active;
        uint32_t pipe_htotal;
@@ -2226,21 +1661,92 @@ hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
               PIPE_WM_LINETIME_TIME(linetime);
 }
 
-static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[5])
+static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8])
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
-               uint64_t sskpd = I915_READ64(MCH_SSKPD);
+       if (IS_GEN9(dev)) {
+               uint32_t val;
+               int ret, i;
+               int level, max_level = ilk_wm_max_level(dev);
 
-               wm[0] = (sskpd >> 56) & 0xFF;
-               if (wm[0] == 0)
-                       wm[0] = sskpd & 0xF;
-               wm[1] = (sskpd >> 4) & 0xFF;
-               wm[2] = (sskpd >> 12) & 0xFF;
-               wm[3] = (sskpd >> 20) & 0x1FF;
-               wm[4] = (sskpd >> 32) & 0x1FF;
-       } else if (INTEL_INFO(dev)->gen >= 6) {
+               /* read the first set of memory latencies[0:3] */
+               val = 0; /* data0 to be programmed to 0 for first set */
+               mutex_lock(&dev_priv->rps.hw_lock);
+               ret = sandybridge_pcode_read(dev_priv,
+                                            GEN9_PCODE_READ_MEM_LATENCY,
+                                            &val);
+               mutex_unlock(&dev_priv->rps.hw_lock);
+
+               if (ret) {
+                       DRM_ERROR("SKL Mailbox read error = %d\n", ret);
+                       return;
+               }
+
+               wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
+               wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
+                               GEN9_MEM_LATENCY_LEVEL_MASK;
+               wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
+                               GEN9_MEM_LATENCY_LEVEL_MASK;
+               wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
+                               GEN9_MEM_LATENCY_LEVEL_MASK;
+
+               /* read the second set of memory latencies[4:7] */
+               val = 1; /* data0 to be programmed to 1 for second set */
+               mutex_lock(&dev_priv->rps.hw_lock);
+               ret = sandybridge_pcode_read(dev_priv,
+                                            GEN9_PCODE_READ_MEM_LATENCY,
+                                            &val);
+               mutex_unlock(&dev_priv->rps.hw_lock);
+               if (ret) {
+                       DRM_ERROR("SKL Mailbox read error = %d\n", ret);
+                       return;
+               }
+
+               wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK;
+               wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) &
+                               GEN9_MEM_LATENCY_LEVEL_MASK;
+               wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) &
+                               GEN9_MEM_LATENCY_LEVEL_MASK;
+               wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) &
+                               GEN9_MEM_LATENCY_LEVEL_MASK;
+
+               /*
+                * punit doesn't take into account the read latency so we need
+                * to add 2us to the various latency levels we retrieve from
+                * the punit.
+                *   - W0 is a bit special in that it's the only level that
+                *   can't be disabled if we want to have display working, so
+                *   we always add 2us there.
+                *   - For levels >=1, punit returns 0us latency when they are
+                *   disabled, so we respect that and don't add 2us then
+                *
+                * Additionally, if a level n (n > 1) has a 0us latency, all
+                * levels m (m >= n) need to be disabled. We make sure to
+                * sanitize the values out of the punit to satisfy this
+                * requirement.
+                */
+               wm[0] += 2;
+               for (level = 1; level <= max_level; level++)
+                       if (wm[level] != 0)
+                               wm[level] += 2;
+                       else {
+                               for (i = level + 1; i <= max_level; i++)
+                                       wm[i] = 0;
+
+                               break;
+                       }
+       } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
+               uint64_t sskpd = I915_READ64(MCH_SSKPD);
+
+               wm[0] = (sskpd >> 56) & 0xFF;
+               if (wm[0] == 0)
+                       wm[0] = sskpd & 0xF;
+               wm[1] = (sskpd >> 4) & 0xFF;
+               wm[2] = (sskpd >> 12) & 0xFF;
+               wm[3] = (sskpd >> 20) & 0x1FF;
+               wm[4] = (sskpd >> 32) & 0x1FF;
+       } else if (INTEL_INFO(dev)->gen >= 6) {
                uint32_t sskpd = I915_READ(MCH_SSKPD);
 
                wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
@@ -2278,7 +1784,9 @@ static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
 int ilk_wm_max_level(const struct drm_device *dev)
 {
        /* how many WM levels are we expecting */
-       if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+       if (IS_GEN9(dev))
+               return 7;
+       else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
                return 4;
        else if (INTEL_INFO(dev)->gen >= 6)
                return 3;
@@ -2288,7 +1796,7 @@ int ilk_wm_max_level(const struct drm_device *dev)
 
 static void intel_print_wm_latency(struct drm_device *dev,
                                   const char *name,
-                                  const uint16_t wm[5])
+                                  const uint16_t wm[8])
 {
        int level, max_level = ilk_wm_max_level(dev);
 
@@ -2301,8 +1809,13 @@ static void intel_print_wm_latency(struct drm_device *dev,
                        continue;
                }
 
-               /* WM1+ latency values in 0.5us units */
-               if (level > 0)
+               /*
+                * - latencies are in us on gen9.
+                * - before then, WM1+ latency values are in 0.5us units
+                */
+               if (IS_GEN9(dev))
+                       latency *= 10;
+               else if (level > 0)
                        latency *= 5;
 
                DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
@@ -2370,6 +1883,14 @@ static void ilk_setup_wm_latency(struct drm_device *dev)
                snb_wm_latency_quirk(dev);
 }
 
+static void skl_setup_wm_latency(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       intel_read_wm_latency(dev, dev_priv->wm.skl_latency);
+       intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency);
+}
+
 static void ilk_compute_wm_parameters(struct drm_crtc *crtc,
                                      struct ilk_pipe_wm_parameters *p)
 {
@@ -2860,4337 +2381,3938 @@ static bool ilk_disable_lp_wm(struct drm_device *dev)
        return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
 }
 
-static void ilk_update_wm(struct drm_crtc *crtc)
-{
-       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct ilk_wm_maximums max;
-       struct ilk_pipe_wm_parameters params = {};
-       struct ilk_wm_values results = {};
-       enum intel_ddb_partitioning partitioning;
-       struct intel_pipe_wm pipe_wm = {};
-       struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
-       struct intel_wm_config config = {};
+/*
+ * On gen9, we need to allocate Display Data Buffer (DDB) portions to the
+ * different active planes.
+ */
 
-       ilk_compute_wm_parameters(crtc, &params);
+#define SKL_DDB_SIZE           896     /* in blocks */
 
-       intel_compute_pipe_wm(crtc, &params, &pipe_wm);
+static void
+skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
+                                  struct drm_crtc *for_crtc,
+                                  const struct intel_wm_config *config,
+                                  const struct skl_pipe_wm_parameters *params,
+                                  struct skl_ddb_entry *alloc /* out */)
+{
+       struct drm_crtc *crtc;
+       unsigned int pipe_size, ddb_size;
+       int nth_active_pipe;
 
-       if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
+       if (!params->active) {
+               alloc->start = 0;
+               alloc->end = 0;
                return;
+       }
 
-       intel_crtc->wm.active = pipe_wm;
+       ddb_size = SKL_DDB_SIZE;
 
-       ilk_compute_wm_config(dev, &config);
+       ddb_size -= 4; /* 4 blocks for bypass path allocation */
 
-       ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
-       ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
+       nth_active_pipe = 0;
+       for_each_crtc(dev, crtc) {
+               if (!intel_crtc_active(crtc))
+                       continue;
 
-       /* 5/6 split only in single pipe config on IVB+ */
-       if (INTEL_INFO(dev)->gen >= 7 &&
-           config.num_pipes_active == 1 && config.sprites_enabled) {
-               ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
-               ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
+               if (crtc == for_crtc)
+                       break;
 
-               best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
-       } else {
-               best_lp_wm = &lp_wm_1_2;
+               nth_active_pipe++;
        }
 
-       partitioning = (best_lp_wm == &lp_wm_1_2) ?
-                      INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
+       pipe_size = ddb_size / config->num_pipes_active;
+       alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active;
+       alloc->end = alloc->start + pipe_size;
+}
 
-       ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
+static unsigned int skl_cursor_allocation(const struct intel_wm_config *config)
+{
+       if (config->num_pipes_active == 1)
+               return 32;
 
-       ilk_write_wm_values(dev_priv, &results);
+       return 8;
 }
 
-static void
-ilk_update_sprite_wm(struct drm_plane *plane,
-                    struct drm_crtc *crtc,
-                    uint32_t sprite_width, uint32_t sprite_height,
-                    int pixel_size, bool enabled, bool scaled)
+static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
 {
-       struct drm_device *dev = plane->dev;
-       struct intel_plane *intel_plane = to_intel_plane(plane);
+       entry->start = reg & 0x3ff;
+       entry->end = (reg >> 16) & 0x3ff;
+       if (entry->end)
+               entry->end += 1;
+}
 
-       intel_plane->wm.enabled = enabled;
-       intel_plane->wm.scaled = scaled;
-       intel_plane->wm.horiz_pixels = sprite_width;
-       intel_plane->wm.vert_pixels = sprite_width;
-       intel_plane->wm.bytes_per_pixel = pixel_size;
+void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
+                         struct skl_ddb_allocation *ddb /* out */)
+{
+       struct drm_device *dev = dev_priv->dev;
+       enum pipe pipe;
+       int plane;
+       u32 val;
 
-       /*
-        * IVB workaround: must disable low power watermarks for at least
-        * one frame before enabling scaling.  LP watermarks can be re-enabled
-        * when scaling is disabled.
-        *
-        * WaCxSRDisabledForSpriteScaling:ivb
-        */
-       if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev))
-               intel_wait_for_vblank(dev, intel_plane->pipe);
+       for_each_pipe(dev_priv, pipe) {
+               for_each_plane(pipe, plane) {
+                       val = I915_READ(PLANE_BUF_CFG(pipe, plane));
+                       skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane],
+                                                  val);
+               }
 
-       ilk_update_wm(crtc);
+               val = I915_READ(CUR_BUF_CFG(pipe));
+               skl_ddb_entry_init_from_hw(&ddb->cursor[pipe], val);
+       }
 }
 
-static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
+static unsigned int
+skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p)
+{
+       return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
+}
+
+/*
+ * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching
+ * a 8192x4096@32bpp framebuffer:
+ *   3 * 4096 * 8192  * 4 < 2^32
+ */
+static unsigned int
+skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
+                                const struct skl_pipe_wm_parameters *params)
+{
+       unsigned int total_data_rate = 0;
+       int plane;
+
+       for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
+               const struct intel_plane_wm_parameters *p;
+
+               p = &params->plane[plane];
+               if (!p->enabled)
+                       continue;
+
+               total_data_rate += skl_plane_relative_data_rate(p);
+       }
+
+       return total_data_rate;
+}
+
+static void
+skl_allocate_pipe_ddb(struct drm_crtc *crtc,
+                     const struct intel_wm_config *config,
+                     const struct skl_pipe_wm_parameters *params,
+                     struct skl_ddb_allocation *ddb /* out */)
 {
        struct drm_device *dev = crtc->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct ilk_wm_values *hw = &dev_priv->wm.hw;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-       struct intel_pipe_wm *active = &intel_crtc->wm.active;
        enum pipe pipe = intel_crtc->pipe;
-       static const unsigned int wm0_pipe_reg[] = {
-               [PIPE_A] = WM0_PIPEA_ILK,
-               [PIPE_B] = WM0_PIPEB_ILK,
-               [PIPE_C] = WM0_PIPEC_IVB,
-       };
+       struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
+       uint16_t alloc_size, start, cursor_blocks;
+       unsigned int total_data_rate;
+       int plane;
+
+       skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc);
+       alloc_size = skl_ddb_entry_size(alloc);
+       if (alloc_size == 0) {
+               memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
+               memset(&ddb->cursor[pipe], 0, sizeof(ddb->cursor[pipe]));
+               return;
+       }
 
-       hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
-       if (IS_HASWELL(dev) || IS_BROADWELL(dev))
-               hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
+       cursor_blocks = skl_cursor_allocation(config);
+       ddb->cursor[pipe].start = alloc->end - cursor_blocks;
+       ddb->cursor[pipe].end = alloc->end;
 
-       active->pipe_enabled = intel_crtc_active(crtc);
+       alloc_size -= cursor_blocks;
+       alloc->end -= cursor_blocks;
 
-       if (active->pipe_enabled) {
-               u32 tmp = hw->wm_pipe[pipe];
+       /*
+        * Each active plane get a portion of the remaining space, in
+        * proportion to the amount of data they need to fetch from memory.
+        *
+        * FIXME: we may not allocate every single block here.
+        */
+       total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params);
 
-               /*
-                * For active pipes LP0 watermark is marked as
-                * enabled, and LP1+ watermaks as disabled since
-                * we can't really reverse compute them in case
-                * multiple pipes are active.
-                */
-               active->wm[0].enable = true;
-               active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
-               active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
-               active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
-               active->linetime = hw->wm_linetime[pipe];
-       } else {
-               int level, max_level = ilk_wm_max_level(dev);
+       start = alloc->start;
+       for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
+               const struct intel_plane_wm_parameters *p;
+               unsigned int data_rate;
+               uint16_t plane_blocks;
+
+               p = &params->plane[plane];
+               if (!p->enabled)
+                       continue;
+
+               data_rate = skl_plane_relative_data_rate(p);
 
                /*
-                * For inactive pipes, all watermark levels
-                * should be marked as enabled but zeroed,
-                * which is what we'd compute them to.
+                * promote the expression to 64 bits to avoid overflowing, the
+                * result is < available as data_rate / total_data_rate < 1
                 */
-               for (level = 0; level <= max_level; level++)
-                       active->wm[level].enable = true;
+               plane_blocks = div_u64((uint64_t)alloc_size * data_rate,
+                                      total_data_rate);
+
+               ddb->plane[pipe][plane].start = start;
+               ddb->plane[pipe][plane].end = start + plane_blocks;
+
+               start += plane_blocks;
        }
+
 }
 
-void ilk_wm_get_hw_state(struct drm_device *dev)
+static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_config *config)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct ilk_wm_values *hw = &dev_priv->wm.hw;
-       struct drm_crtc *crtc;
-
-       for_each_crtc(dev, crtc)
-               ilk_pipe_wm_get_hw_state(crtc);
+       /* TODO: Take into account the scalers once we support them */
+       return config->adjusted_mode.crtc_clock;
+}
 
-       hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
-       hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
-       hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
+/*
+ * The max latency should be 257 (max the punit can code is 255 and we add 2us
+ * for the read latency) and bytes_per_pixel should always be <= 8, so that
+ * should allow pixel_rate up to ~2 GHz which seems sufficient since max
+ * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
+*/
+static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
+                              uint32_t latency)
+{
+       uint32_t wm_intermediate_val, ret;
 
-       hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
-       if (INTEL_INFO(dev)->gen >= 7) {
-               hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
-               hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
-       }
+       if (latency == 0)
+               return UINT_MAX;
 
-       if (IS_HASWELL(dev) || IS_BROADWELL(dev))
-               hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
-                       INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
-       else if (IS_IVYBRIDGE(dev))
-               hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
-                       INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
+       wm_intermediate_val = latency * pixel_rate * bytes_per_pixel;
+       ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
 
-       hw->enable_fbc_wm =
-               !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
+       return ret;
 }
 
-/**
- * intel_update_watermarks - update FIFO watermark values based on current modes
- *
- * Calculate watermark values for the various WM regs based on current mode
- * and plane configuration.
- *
- * There are several cases to deal with here:
- *   - normal (i.e. non-self-refresh)
- *   - self-refresh (SR) mode
- *   - lines are large relative to FIFO size (buffer can hold up to 2)
- *   - lines are small relative to FIFO size (buffer can hold more than 2
- *     lines), so need to account for TLB latency
- *
- *   The normal calculation is:
- *     watermark = dotclock * bytes per pixel * latency
- *   where latency is platform & configuration dependent (we assume pessimal
- *   values here).
- *
- *   The SR calculation is:
- *     watermark = (trunc(latency/line time)+1) * surface width *
- *       bytes per pixel
- *   where
- *     line time = htotal / dotclock
- *     surface width = hdisplay for normal plane and 64 for cursor
- *   and latency is assumed to be high, as above.
- *
- * The final value programmed to the register should always be rounded up,
- * and include an extra 2 entries to account for clock crossings.
- *
- * We don't use the sprite, so we can ignore that.  And on Crestline we have
- * to set the non-SR watermarks to 8.
- */
-void intel_update_watermarks(struct drm_crtc *crtc)
+static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
+                              uint32_t horiz_pixels, uint8_t bytes_per_pixel,
+                              uint32_t latency)
 {
-       struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+       uint32_t ret, plane_bytes_per_line, wm_intermediate_val;
 
-       if (dev_priv->display.update_wm)
-               dev_priv->display.update_wm(crtc);
-}
+       if (latency == 0)
+               return UINT_MAX;
 
-void intel_update_sprite_watermarks(struct drm_plane *plane,
-                                   struct drm_crtc *crtc,
-                                   uint32_t sprite_width,
-                                   uint32_t sprite_height,
-                                   int pixel_size,
-                                   bool enabled, bool scaled)
-{
-       struct drm_i915_private *dev_priv = plane->dev->dev_private;
+       plane_bytes_per_line = horiz_pixels * bytes_per_pixel;
+       wm_intermediate_val = latency * pixel_rate;
+       ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) *
+                               plane_bytes_per_line;
 
-       if (dev_priv->display.update_sprite_wm)
-               dev_priv->display.update_sprite_wm(plane, crtc,
-                                                  sprite_width, sprite_height,
-                                                  pixel_size, enabled, scaled);
+       return ret;
 }
 
-static struct drm_i915_gem_object *
-intel_alloc_context_page(struct drm_device *dev)
+static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb,
+                                      const struct intel_crtc *intel_crtc)
 {
-       struct drm_i915_gem_object *ctx;
-       int ret;
+       struct drm_device *dev = intel_crtc->base.dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb;
+       enum pipe pipe = intel_crtc->pipe;
 
-       WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+       if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe],
+                  sizeof(new_ddb->plane[pipe])))
+               return true;
 
-       ctx = i915_gem_alloc_object(dev, 4096);
-       if (!ctx) {
-               DRM_DEBUG("failed to alloc power context, RC6 disabled\n");
-               return NULL;
-       }
+       if (memcmp(&new_ddb->cursor[pipe], &cur_ddb->cursor[pipe],
+                   sizeof(new_ddb->cursor[pipe])))
+               return true;
 
-       ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0);
-       if (ret) {
-               DRM_ERROR("failed to pin power context: %d\n", ret);
-               goto err_unref;
+       return false;
+}
+
+static void skl_compute_wm_global_parameters(struct drm_device *dev,
+                                            struct intel_wm_config *config)
+{
+       struct drm_crtc *crtc;
+       struct drm_plane *plane;
+
+       list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+               config->num_pipes_active += intel_crtc_active(crtc);
+
+       /* FIXME: I don't think we need those two global parameters on SKL */
+       list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
+               struct intel_plane *intel_plane = to_intel_plane(plane);
+
+               config->sprites_enabled |= intel_plane->wm.enabled;
+               config->sprites_scaled |= intel_plane->wm.scaled;
        }
+}
 
-       ret = i915_gem_object_set_to_gtt_domain(ctx, 1);
-       if (ret) {
-               DRM_ERROR("failed to set-domain on power context: %d\n", ret);
-               goto err_unpin;
+static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
+                                          struct skl_pipe_wm_parameters *p)
+{
+       struct drm_device *dev = crtc->dev;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       enum pipe pipe = intel_crtc->pipe;
+       struct drm_plane *plane;
+       int i = 1; /* Index for sprite planes start */
+
+       p->active = intel_crtc_active(crtc);
+       if (p->active) {
+               p->pipe_htotal = intel_crtc->config.adjusted_mode.crtc_htotal;
+               p->pixel_rate = skl_pipe_pixel_rate(&intel_crtc->config);
+
+               /*
+                * For now, assume primary and cursor planes are always enabled.
+                */
+               p->plane[0].enabled = true;
+               p->plane[0].bytes_per_pixel =
+                       crtc->primary->fb->bits_per_pixel / 8;
+               p->plane[0].horiz_pixels = intel_crtc->config.pipe_src_w;
+               p->plane[0].vert_pixels = intel_crtc->config.pipe_src_h;
+
+               p->cursor.enabled = true;
+               p->cursor.bytes_per_pixel = 4;
+               p->cursor.horiz_pixels = intel_crtc->cursor_width ?
+                                        intel_crtc->cursor_width : 64;
        }
 
-       return ctx;
+       list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
+               struct intel_plane *intel_plane = to_intel_plane(plane);
 
-err_unpin:
-       i915_gem_object_ggtt_unpin(ctx);
-err_unref:
-       drm_gem_object_unreference(&ctx->base);
-       return NULL;
+               if (intel_plane->pipe == pipe &&
+                       plane->type == DRM_PLANE_TYPE_OVERLAY)
+                       p->plane[i++] = intel_plane->wm;
+       }
 }
 
-/**
- * Lock protecting IPS related data structures
- */
-DEFINE_SPINLOCK(mchdev_lock);
+static bool skl_compute_plane_wm(struct skl_pipe_wm_parameters *p,
+                                struct intel_plane_wm_parameters *p_params,
+                                uint16_t ddb_allocation,
+                                uint32_t mem_value,
+                                uint16_t *out_blocks, /* out */
+                                uint8_t *out_lines /* out */)
+{
+       uint32_t method1, method2, plane_bytes_per_line, res_blocks, res_lines;
+       uint32_t result_bytes;
 
-/* Global for IPS driver to get at the current i915 device. Protected by
- * mchdev_lock. */
-static struct drm_i915_private *i915_mch_dev;
+       if (mem_value == 0 || !p->active || !p_params->enabled)
+               return false;
 
-bool ironlake_set_drps(struct drm_device *dev, u8 val)
+       method1 = skl_wm_method1(p->pixel_rate,
+                                p_params->bytes_per_pixel,
+                                mem_value);
+       method2 = skl_wm_method2(p->pixel_rate,
+                                p->pipe_htotal,
+                                p_params->horiz_pixels,
+                                p_params->bytes_per_pixel,
+                                mem_value);
+
+       plane_bytes_per_line = p_params->horiz_pixels *
+                                       p_params->bytes_per_pixel;
+
+       /* For now xtile and linear */
+       if (((ddb_allocation * 512) / plane_bytes_per_line) >= 1)
+               result_bytes = min(method1, method2);
+       else
+               result_bytes = method1;
+
+       res_blocks = DIV_ROUND_UP(result_bytes, 512) + 1;
+       res_lines = DIV_ROUND_UP(result_bytes, plane_bytes_per_line);
+
+       if (res_blocks > ddb_allocation || res_lines > 31)
+               return false;
+
+       *out_blocks = res_blocks;
+       *out_lines = res_lines;
+
+       return true;
+}
+
+static void skl_compute_wm_level(const struct drm_i915_private *dev_priv,
+                                struct skl_ddb_allocation *ddb,
+                                struct skl_pipe_wm_parameters *p,
+                                enum pipe pipe,
+                                int level,
+                                int num_planes,
+                                struct skl_wm_level *result)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       u16 rgvswctl;
+       uint16_t latency = dev_priv->wm.skl_latency[level];
+       uint16_t ddb_blocks;
+       int i;
 
-       assert_spin_locked(&mchdev_lock);
+       for (i = 0; i < num_planes; i++) {
+               ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]);
 
-       rgvswctl = I915_READ16(MEMSWCTL);
-       if (rgvswctl & MEMCTL_CMD_STS) {
-               DRM_DEBUG("gpu busy, RCS change rejected\n");
-               return false; /* still busy with another command */
+               result->plane_en[i] = skl_compute_plane_wm(p, &p->plane[i],
+                                               ddb_blocks,
+                                               latency,
+                                               &result->plane_res_b[i],
+                                               &result->plane_res_l[i]);
        }
 
-       rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
-               (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
-       I915_WRITE16(MEMSWCTL, rgvswctl);
-       POSTING_READ16(MEMSWCTL);
+       ddb_blocks = skl_ddb_entry_size(&ddb->cursor[pipe]);
+       result->cursor_en = skl_compute_plane_wm(p, &p->cursor, ddb_blocks,
+                                                latency, &result->cursor_res_b,
+                                                &result->cursor_res_l);
+}
 
-       rgvswctl |= MEMCTL_CMD_STS;
-       I915_WRITE16(MEMSWCTL, rgvswctl);
+static uint32_t
+skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p)
+{
+       if (!intel_crtc_active(crtc))
+               return 0;
+
+       return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate);
 
-       return true;
 }
 
-static void ironlake_enable_drps(struct drm_device *dev)
+static void skl_compute_transition_wm(struct drm_crtc *crtc,
+                                     struct skl_pipe_wm_parameters *params,
+                                     struct skl_wm_level *trans_wm /* out */)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 rgvmodectl = I915_READ(MEMMODECTL);
-       u8 fmax, fmin, fstart, vstart;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       int i;
 
-       spin_lock_irq(&mchdev_lock);
+       if (!params->active)
+               return;
 
-       /* Enable temp reporting */
-       I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
-       I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
+       /* Until we know more, just disable transition WMs */
+       for (i = 0; i < intel_num_planes(intel_crtc); i++)
+               trans_wm->plane_en[i] = false;
+       trans_wm->cursor_en = false;
+}
 
-       /* 100ms RC evaluation intervals */
-       I915_WRITE(RCUPEI, 100000);
-       I915_WRITE(RCDNEI, 100000);
+static void skl_compute_pipe_wm(struct drm_crtc *crtc,
+                               struct skl_ddb_allocation *ddb,
+                               struct skl_pipe_wm_parameters *params,
+                               struct skl_pipe_wm *pipe_wm)
+{
+       struct drm_device *dev = crtc->dev;
+       const struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       int level, max_level = ilk_wm_max_level(dev);
 
-       /* Set max/min thresholds to 90ms and 80ms respectively */
-       I915_WRITE(RCBMAXAVG, 90000);
-       I915_WRITE(RCBMINAVG, 80000);
+       for (level = 0; level <= max_level; level++) {
+               skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe,
+                                    level, intel_num_planes(intel_crtc),
+                                    &pipe_wm->wm[level]);
+       }
+       pipe_wm->linetime = skl_compute_linetime_wm(crtc, params);
 
-       I915_WRITE(MEMIHYST, 1);
+       skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm);
+}
 
-       /* Set up min, max, and cur for interrupt handling */
-       fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
-       fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
-       fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
-               MEMMODE_FSTART_SHIFT;
+static void skl_compute_wm_results(struct drm_device *dev,
+                                  struct skl_pipe_wm_parameters *p,
+                                  struct skl_pipe_wm *p_wm,
+                                  struct skl_wm_values *r,
+                                  struct intel_crtc *intel_crtc)
+{
+       int level, max_level = ilk_wm_max_level(dev);
+       enum pipe pipe = intel_crtc->pipe;
+       uint32_t temp;
+       int i;
 
-       vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >>
-               PXVFREQ_PX_SHIFT;
+       for (level = 0; level <= max_level; level++) {
+               for (i = 0; i < intel_num_planes(intel_crtc); i++) {
+                       temp = 0;
 
-       dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
-       dev_priv->ips.fstart = fstart;
+                       temp |= p_wm->wm[level].plane_res_l[i] <<
+                                       PLANE_WM_LINES_SHIFT;
+                       temp |= p_wm->wm[level].plane_res_b[i];
+                       if (p_wm->wm[level].plane_en[i])
+                               temp |= PLANE_WM_EN;
 
-       dev_priv->ips.max_delay = fstart;
-       dev_priv->ips.min_delay = fmin;
-       dev_priv->ips.cur_delay = fstart;
+                       r->plane[pipe][i][level] = temp;
+               }
 
-       DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
-                        fmax, fmin, fstart);
+               temp = 0;
 
-       I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
+               temp |= p_wm->wm[level].cursor_res_l << PLANE_WM_LINES_SHIFT;
+               temp |= p_wm->wm[level].cursor_res_b;
 
-       /*
-        * Interrupts will be enabled in ironlake_irq_postinstall
-        */
+               if (p_wm->wm[level].cursor_en)
+                       temp |= PLANE_WM_EN;
 
-       I915_WRITE(VIDSTART, vstart);
-       POSTING_READ(VIDSTART);
+               r->cursor[pipe][level] = temp;
 
-       rgvmodectl |= MEMMODE_SWMODE_EN;
-       I915_WRITE(MEMMODECTL, rgvmodectl);
+       }
 
-       if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
-               DRM_ERROR("stuck trying to change perf mode\n");
-       mdelay(1);
+       /* transition WMs */
+       for (i = 0; i < intel_num_planes(intel_crtc); i++) {
+               temp = 0;
+               temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT;
+               temp |= p_wm->trans_wm.plane_res_b[i];
+               if (p_wm->trans_wm.plane_en[i])
+                       temp |= PLANE_WM_EN;
 
-       ironlake_set_drps(dev, fstart);
+               r->plane_trans[pipe][i] = temp;
+       }
 
-       dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) +
-               I915_READ(0x112e0);
-       dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
-       dev_priv->ips.last_count2 = I915_READ(0x112f4);
-       dev_priv->ips.last_time2 = ktime_get_raw_ns();
+       temp = 0;
+       temp |= p_wm->trans_wm.cursor_res_l << PLANE_WM_LINES_SHIFT;
+       temp |= p_wm->trans_wm.cursor_res_b;
+       if (p_wm->trans_wm.cursor_en)
+               temp |= PLANE_WM_EN;
 
-       spin_unlock_irq(&mchdev_lock);
+       r->cursor_trans[pipe] = temp;
+
+       r->wm_linetime[pipe] = p_wm->linetime;
 }
 
-static void ironlake_disable_drps(struct drm_device *dev)
+static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg,
+                               const struct skl_ddb_entry *entry)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       u16 rgvswctl;
+       if (entry->end)
+               I915_WRITE(reg, (entry->end - 1) << 16 | entry->start);
+       else
+               I915_WRITE(reg, 0);
+}
 
-       spin_lock_irq(&mchdev_lock);
+static void skl_write_wm_values(struct drm_i915_private *dev_priv,
+                               const struct skl_wm_values *new)
+{
+       struct drm_device *dev = dev_priv->dev;
+       struct intel_crtc *crtc;
 
-       rgvswctl = I915_READ16(MEMSWCTL);
+       list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
+               int i, level, max_level = ilk_wm_max_level(dev);
+               enum pipe pipe = crtc->pipe;
 
-       /* Ack interrupts, disable EFC interrupt */
-       I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
-       I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
-       I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
-       I915_WRITE(DEIIR, DE_PCU_EVENT);
-       I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
+               if (!new->dirty[pipe])
+                       continue;
 
-       /* Go back to the starting frequency */
-       ironlake_set_drps(dev, dev_priv->ips.fstart);
-       mdelay(1);
-       rgvswctl |= MEMCTL_CMD_STS;
-       I915_WRITE(MEMSWCTL, rgvswctl);
-       mdelay(1);
+               I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]);
 
-       spin_unlock_irq(&mchdev_lock);
+               for (level = 0; level <= max_level; level++) {
+                       for (i = 0; i < intel_num_planes(crtc); i++)
+                               I915_WRITE(PLANE_WM(pipe, i, level),
+                                          new->plane[pipe][i][level]);
+                       I915_WRITE(CUR_WM(pipe, level),
+                                  new->cursor[pipe][level]);
+               }
+               for (i = 0; i < intel_num_planes(crtc); i++)
+                       I915_WRITE(PLANE_WM_TRANS(pipe, i),
+                                  new->plane_trans[pipe][i]);
+               I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]);
+
+               for (i = 0; i < intel_num_planes(crtc); i++)
+                       skl_ddb_entry_write(dev_priv,
+                                           PLANE_BUF_CFG(pipe, i),
+                                           &new->ddb.plane[pipe][i]);
+
+               skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
+                                   &new->ddb.cursor[pipe]);
+       }
 }
 
-/* There's a funny hw issue where the hw returns all 0 when reading from
- * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
- * ourselves, instead of doing a rmw cycle (which might result in us clearing
- * all limits and the gpu stuck at whatever frequency it is at atm).
+/*
+ * When setting up a new DDB allocation arrangement, we need to correctly
+ * sequence the times at which the new allocations for the pipes are taken into
+ * account or we'll have pipes fetching from space previously allocated to
+ * another pipe.
+ *
+ * Roughly the sequence looks like:
+ *  1. re-allocate the pipe(s) with the allocation being reduced and not
+ *     overlapping with a previous light-up pipe (another way to put it is:
+ *     pipes with their new allocation strickly included into their old ones).
+ *  2. re-allocate the other pipes that get their allocation reduced
+ *  3. allocate the pipes having their allocation increased
+ *
+ * Steps 1. and 2. are here to take care of the following case:
+ * - Initially DDB looks like this:
+ *     |   B    |   C    |
+ * - enable pipe A.
+ * - pipe B has a reduced DDB allocation that overlaps with the old pipe C
+ *   allocation
+ *     |  A  |  B  |  C  |
+ *
+ * We need to sequence the re-allocation: C, B, A (and not B, C, A).
  */
-static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val)
+
+static void
+skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass)
 {
-       u32 limits;
+       struct drm_device *dev = dev_priv->dev;
+       int plane;
 
-       /* Only set the down limit when we've reached the lowest level to avoid
-        * getting more interrupts, otherwise leave this clear. This prevents a
-        * race in the hw when coming out of rc6: There's a tiny window where
-        * the hw runs at the minimal clock before selecting the desired
-        * frequency, if the down threshold expires in that window we will not
-        * receive a down interrupt. */
-       limits = dev_priv->rps.max_freq_softlimit << 24;
-       if (val <= dev_priv->rps.min_freq_softlimit)
-               limits |= dev_priv->rps.min_freq_softlimit << 16;
+       DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass);
 
-       return limits;
+       for_each_plane(pipe, plane) {
+               I915_WRITE(PLANE_SURF(pipe, plane),
+                          I915_READ(PLANE_SURF(pipe, plane)));
+       }
+       I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe)));
 }
 
-static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
+static bool
+skl_ddb_allocation_included(const struct skl_ddb_allocation *old,
+                           const struct skl_ddb_allocation *new,
+                           enum pipe pipe)
 {
-       int new_power;
+       uint16_t old_size, new_size;
 
-       new_power = dev_priv->rps.power;
-       switch (dev_priv->rps.power) {
-       case LOW_POWER:
-               if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq)
-                       new_power = BETWEEN;
-               break;
+       old_size = skl_ddb_entry_size(&old->pipe[pipe]);
+       new_size = skl_ddb_entry_size(&new->pipe[pipe]);
 
-       case BETWEEN:
-               if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq)
-                       new_power = LOW_POWER;
-               else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq)
-                       new_power = HIGH_POWER;
-               break;
+       return old_size != new_size &&
+              new->pipe[pipe].start >= old->pipe[pipe].start &&
+              new->pipe[pipe].end <= old->pipe[pipe].end;
+}
 
-       case HIGH_POWER:
-               if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq)
-                       new_power = BETWEEN;
-               break;
-       }
-       /* Max/min bins are special */
-       if (val == dev_priv->rps.min_freq_softlimit)
-               new_power = LOW_POWER;
-       if (val == dev_priv->rps.max_freq_softlimit)
-               new_power = HIGH_POWER;
-       if (new_power == dev_priv->rps.power)
-               return;
+static void skl_flush_wm_values(struct drm_i915_private *dev_priv,
+                               struct skl_wm_values *new_values)
+{
+       struct drm_device *dev = dev_priv->dev;
+       struct skl_ddb_allocation *cur_ddb, *new_ddb;
+       bool reallocated[I915_MAX_PIPES] = {false, false, false};
+       struct intel_crtc *crtc;
+       enum pipe pipe;
 
-       /* Note the units here are not exactly 1us, but 1280ns. */
-       switch (new_power) {
-       case LOW_POWER:
-               /* Upclock if more than 95% busy over 16ms */
-               I915_WRITE(GEN6_RP_UP_EI, 12500);
-               I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800);
+       new_ddb = &new_values->ddb;
+       cur_ddb = &dev_priv->wm.skl_hw.ddb;
 
-               /* Downclock if less than 85% busy over 32ms */
-               I915_WRITE(GEN6_RP_DOWN_EI, 25000);
-               I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250);
+       /*
+        * First pass: flush the pipes with the new allocation contained into
+        * the old space.
+        *
+        * We'll wait for the vblank on those pipes to ensure we can safely
+        * re-allocate the freed space without this pipe fetching from it.
+        */
+       for_each_intel_crtc(dev, crtc) {
+               if (!crtc->active)
+                       continue;
 
-               I915_WRITE(GEN6_RP_CONTROL,
-                          GEN6_RP_MEDIA_TURBO |
-                          GEN6_RP_MEDIA_HW_NORMAL_MODE |
-                          GEN6_RP_MEDIA_IS_GFX |
-                          GEN6_RP_ENABLE |
-                          GEN6_RP_UP_BUSY_AVG |
-                          GEN6_RP_DOWN_IDLE_AVG);
-               break;
+               pipe = crtc->pipe;
 
-       case BETWEEN:
-               /* Upclock if more than 90% busy over 13ms */
-               I915_WRITE(GEN6_RP_UP_EI, 10250);
-               I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225);
-
-               /* Downclock if less than 75% busy over 32ms */
-               I915_WRITE(GEN6_RP_DOWN_EI, 25000);
-               I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750);
-
-               I915_WRITE(GEN6_RP_CONTROL,
-                          GEN6_RP_MEDIA_TURBO |
-                          GEN6_RP_MEDIA_HW_NORMAL_MODE |
-                          GEN6_RP_MEDIA_IS_GFX |
-                          GEN6_RP_ENABLE |
-                          GEN6_RP_UP_BUSY_AVG |
-                          GEN6_RP_DOWN_IDLE_AVG);
-               break;
-
-       case HIGH_POWER:
-               /* Upclock if more than 85% busy over 10ms */
-               I915_WRITE(GEN6_RP_UP_EI, 8000);
-               I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800);
+               if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe))
+                       continue;
 
-               /* Downclock if less than 60% busy over 32ms */
-               I915_WRITE(GEN6_RP_DOWN_EI, 25000);
-               I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000);
+               skl_wm_flush_pipe(dev_priv, pipe, 1);
+               intel_wait_for_vblank(dev, pipe);
 
-               I915_WRITE(GEN6_RP_CONTROL,
-                          GEN6_RP_MEDIA_TURBO |
-                          GEN6_RP_MEDIA_HW_NORMAL_MODE |
-                          GEN6_RP_MEDIA_IS_GFX |
-                          GEN6_RP_ENABLE |
-                          GEN6_RP_UP_BUSY_AVG |
-                          GEN6_RP_DOWN_IDLE_AVG);
-               break;
+               reallocated[pipe] = true;
        }
 
-       dev_priv->rps.power = new_power;
-       dev_priv->rps.last_adj = 0;
-}
 
-static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
-{
-       u32 mask = 0;
+       /*
+        * Second pass: flush the pipes that are having their allocation
+        * reduced, but overlapping with a previous allocation.
+        *
+        * Here as well we need to wait for the vblank to make sure the freed
+        * space is not used anymore.
+        */
+       for_each_intel_crtc(dev, crtc) {
+               if (!crtc->active)
+                       continue;
 
-       if (val > dev_priv->rps.min_freq_softlimit)
-               mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
-       if (val < dev_priv->rps.max_freq_softlimit)
-               mask |= GEN6_PM_RP_UP_THRESHOLD;
+               pipe = crtc->pipe;
 
-       mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
-       mask &= dev_priv->pm_rps_events;
+               if (reallocated[pipe])
+                       continue;
 
-       /* IVB and SNB hard hangs on looping batchbuffer
-        * if GEN6_PM_UP_EI_EXPIRED is masked.
+               if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) <
+                   skl_ddb_entry_size(&cur_ddb->pipe[pipe])) {
+                       skl_wm_flush_pipe(dev_priv, pipe, 2);
+                       intel_wait_for_vblank(dev, pipe);
+                       reallocated[pipe] = true;
+               }
+       }
+
+       /*
+        * Third pass: flush the pipes that got more space allocated.
+        *
+        * We don't need to actively wait for the update here, next vblank
+        * will just get more DDB space with the correct WM values.
         */
-       if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev))
-               mask |= GEN6_PM_RP_UP_EI_EXPIRED;
+       for_each_intel_crtc(dev, crtc) {
+               if (!crtc->active)
+                       continue;
 
-       if (IS_GEN8(dev_priv->dev))
-               mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP;
+               pipe = crtc->pipe;
 
-       return ~mask;
+               /*
+                * At this point, only the pipes more space than before are
+                * left to re-allocate.
+                */
+               if (reallocated[pipe])
+                       continue;
+
+               skl_wm_flush_pipe(dev_priv, pipe, 3);
+       }
 }
 
-/* gen6_set_rps is called to update the frequency request, but should also be
- * called when the range (min_delay and max_delay) is modified so that we can
- * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
-void gen6_set_rps(struct drm_device *dev, u8 val)
+static bool skl_update_pipe_wm(struct drm_crtc *crtc,
+                              struct skl_pipe_wm_parameters *params,
+                              struct intel_wm_config *config,
+                              struct skl_ddb_allocation *ddb, /* out */
+                              struct skl_pipe_wm *pipe_wm /* out */)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
-       WARN_ON(val > dev_priv->rps.max_freq_softlimit);
-       WARN_ON(val < dev_priv->rps.min_freq_softlimit);
-
-       /* min/max delay may still have been modified so be sure to
-        * write the limits value.
-        */
-       if (val != dev_priv->rps.cur_freq) {
-               gen6_set_rps_thresholds(dev_priv, val);
-
-               if (IS_HASWELL(dev) || IS_BROADWELL(dev))
-                       I915_WRITE(GEN6_RPNSWREQ,
-                                  HSW_FREQUENCY(val));
-               else
-                       I915_WRITE(GEN6_RPNSWREQ,
-                                  GEN6_FREQUENCY(val) |
-                                  GEN6_OFFSET(0) |
-                                  GEN6_AGGRESSIVE_TURBO);
-       }
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
-       /* Make sure we continue to get interrupts
-        * until we hit the minimum or maximum frequencies.
-        */
-       I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, gen6_rps_limits(dev_priv, val));
-       I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+       skl_compute_wm_pipe_parameters(crtc, params);
+       skl_allocate_pipe_ddb(crtc, config, params, ddb);
+       skl_compute_pipe_wm(crtc, ddb, params, pipe_wm);
 
-       POSTING_READ(GEN6_RPNSWREQ);
+       if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm)))
+               return false;
 
-       dev_priv->rps.cur_freq = val;
-       trace_intel_gpu_freq_change(val * 50);
+       intel_crtc->wm.skl_active = *pipe_wm;
+       return true;
 }
 
-/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down
- *
- * * If Gfx is Idle, then
- * 1. Mask Turbo interrupts
- * 2. Bring up Gfx clock
- * 3. Change the freq to Rpn and wait till P-Unit updates freq
- * 4. Clear the Force GFX CLK ON bit so that Gfx can down
- * 5. Unmask Turbo interrupts
-*/
-static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
+static void skl_update_other_pipe_wm(struct drm_device *dev,
+                                    struct drm_crtc *crtc,
+                                    struct intel_wm_config *config,
+                                    struct skl_wm_values *r)
 {
-       struct drm_device *dev = dev_priv->dev;
-
-       /* Latest VLV doesn't need to force the gfx clock */
-       if (dev->pdev->revision >= 0xd) {
-               valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
-               return;
-       }
+       struct intel_crtc *intel_crtc;
+       struct intel_crtc *this_crtc = to_intel_crtc(crtc);
 
        /*
-        * When we are idle.  Drop to min voltage state.
+        * If the WM update hasn't changed the allocation for this_crtc (the
+        * crtc we are currently computing the new WM values for), other
+        * enabled crtcs will keep the same allocation and we don't need to
+        * recompute anything for them.
         */
-
-       if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit)
+       if (!skl_ddb_allocation_changed(&r->ddb, this_crtc))
                return;
 
-       /* Mask turbo interrupt so that they will not come in between */
-       I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
-
-       vlv_force_gfx_clock(dev_priv, true);
-
-       dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit;
-
-       vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ,
-                                       dev_priv->rps.min_freq_softlimit);
-
-       if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
-                               & GENFREQSTATUS) == 0, 5))
-               DRM_ERROR("timed out waiting for Punit\n");
-
-       vlv_force_gfx_clock(dev_priv, false);
+       /*
+        * Otherwise, because of this_crtc being freshly enabled/disabled, the
+        * other active pipes need new DDB allocation and WM values.
+        */
+       list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list,
+                               base.head) {
+               struct skl_pipe_wm_parameters params = {};
+               struct skl_pipe_wm pipe_wm = {};
+               bool wm_changed;
 
-       I915_WRITE(GEN6_PMINTRMSK,
-                  gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
-}
+               if (this_crtc->pipe == intel_crtc->pipe)
+                       continue;
 
-void gen6_rps_idle(struct drm_i915_private *dev_priv)
-{
-       struct drm_device *dev = dev_priv->dev;
+               if (!intel_crtc->active)
+                       continue;
 
-       mutex_lock(&dev_priv->rps.hw_lock);
-       if (dev_priv->rps.enabled) {
-               if (IS_CHERRYVIEW(dev))
-                       valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
-               else if (IS_VALLEYVIEW(dev))
-                       vlv_set_rps_idle(dev_priv);
-               else
-                       gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
-               dev_priv->rps.last_adj = 0;
-       }
-       mutex_unlock(&dev_priv->rps.hw_lock);
-}
+               wm_changed = skl_update_pipe_wm(&intel_crtc->base,
+                                               &params, config,
+                                               &r->ddb, &pipe_wm);
 
-void gen6_rps_boost(struct drm_i915_private *dev_priv)
-{
-       struct drm_device *dev = dev_priv->dev;
+               /*
+                * If we end up re-computing the other pipe WM values, it's
+                * because it was really needed, so we expect the WM values to
+                * be different.
+                */
+               WARN_ON(!wm_changed);
 
-       mutex_lock(&dev_priv->rps.hw_lock);
-       if (dev_priv->rps.enabled) {
-               if (IS_VALLEYVIEW(dev))
-                       valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
-               else
-                       gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
-               dev_priv->rps.last_adj = 0;
+               skl_compute_wm_results(dev, &params, &pipe_wm, r, intel_crtc);
+               r->dirty[intel_crtc->pipe] = true;
        }
-       mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
-void valleyview_set_rps(struct drm_device *dev, u8 val)
+static void skl_update_wm(struct drm_crtc *crtc)
 {
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
+       struct skl_pipe_wm_parameters params = {};
+       struct skl_wm_values *results = &dev_priv->wm.skl_results;
+       struct skl_pipe_wm pipe_wm = {};
+       struct intel_wm_config config = {};
 
-       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
-       WARN_ON(val > dev_priv->rps.max_freq_softlimit);
-       WARN_ON(val < dev_priv->rps.min_freq_softlimit);
+       memset(results, 0, sizeof(*results));
 
-       if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
-                     "Odd GPU freq value\n"))
-               val &= ~1;
+       skl_compute_wm_global_parameters(dev, &config);
 
-       if (val != dev_priv->rps.cur_freq) {
-               DRM_DEBUG_DRIVER("GPU freq request from %d MHz (%u) to %d MHz (%u)\n",
-                                vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
-                                dev_priv->rps.cur_freq,
-                                vlv_gpu_freq(dev_priv, val), val);
+       if (!skl_update_pipe_wm(crtc, &params, &config,
+                               &results->ddb, &pipe_wm))
+               return;
 
-               vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
-       }
+       skl_compute_wm_results(dev, &params, &pipe_wm, results, intel_crtc);
+       results->dirty[intel_crtc->pipe] = true;
 
-       I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+       skl_update_other_pipe_wm(dev, crtc, &config, results);
+       skl_write_wm_values(dev_priv, results);
+       skl_flush_wm_values(dev_priv, results);
 
-       dev_priv->rps.cur_freq = val;
-       trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv, val));
+       /* store the new configuration */
+       dev_priv->wm.skl_hw = *results;
 }
 
-static void gen8_disable_rps_interrupts(struct drm_device *dev)
+static void
+skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
+                    uint32_t sprite_width, uint32_t sprite_height,
+                    int pixel_size, bool enabled, bool scaled)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP);
-       I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) &
-                                  ~dev_priv->pm_rps_events);
-       /* Complete PM interrupt masking here doesn't race with the rps work
-        * item again unmasking PM interrupts because that is using a different
-        * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in
-        * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which
-        * gen8_enable_rps will clean up. */
+       struct intel_plane *intel_plane = to_intel_plane(plane);
 
-       spin_lock_irq(&dev_priv->irq_lock);
-       dev_priv->rps.pm_iir = 0;
-       spin_unlock_irq(&dev_priv->irq_lock);
+       intel_plane->wm.enabled = enabled;
+       intel_plane->wm.scaled = scaled;
+       intel_plane->wm.horiz_pixels = sprite_width;
+       intel_plane->wm.vert_pixels = sprite_height;
+       intel_plane->wm.bytes_per_pixel = pixel_size;
 
-       I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events);
+       skl_update_wm(crtc);
 }
 
-static void gen6_disable_rps_interrupts(struct drm_device *dev)
+static void ilk_update_wm(struct drm_crtc *crtc)
 {
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
+       struct ilk_wm_maximums max;
+       struct ilk_pipe_wm_parameters params = {};
+       struct ilk_wm_values results = {};
+       enum intel_ddb_partitioning partitioning;
+       struct intel_pipe_wm pipe_wm = {};
+       struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm;
+       struct intel_wm_config config = {};
 
-       I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
-       I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
-                               ~dev_priv->pm_rps_events);
-       /* Complete PM interrupt masking here doesn't race with the rps work
-        * item again unmasking PM interrupts because that is using a different
-        * register (PMIMR) to mask PM interrupts. The only risk is in leaving
-        * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
-
-       spin_lock_irq(&dev_priv->irq_lock);
-       dev_priv->rps.pm_iir = 0;
-       spin_unlock_irq(&dev_priv->irq_lock);
+       ilk_compute_wm_parameters(crtc, &params);
 
-       I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events);
-}
+       intel_compute_pipe_wm(crtc, &params, &pipe_wm);
 
-static void gen6_disable_rps(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm)))
+               return;
 
-       I915_WRITE(GEN6_RC_CONTROL, 0);
-       I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
+       intel_crtc->wm.active = pipe_wm;
 
-       if (IS_BROADWELL(dev))
-               gen8_disable_rps_interrupts(dev);
-       else
-               gen6_disable_rps_interrupts(dev);
-}
+       ilk_compute_wm_config(dev, &config);
 
-static void cherryview_disable_rps(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max);
+       ilk_wm_merge(dev, &config, &max, &lp_wm_1_2);
 
-       I915_WRITE(GEN6_RC_CONTROL, 0);
+       /* 5/6 split only in single pipe config on IVB+ */
+       if (INTEL_INFO(dev)->gen >= 7 &&
+           config.num_pipes_active == 1 && config.sprites_enabled) {
+               ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max);
+               ilk_wm_merge(dev, &config, &max, &lp_wm_5_6);
 
-       gen8_disable_rps_interrupts(dev);
-}
+               best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6);
+       } else {
+               best_lp_wm = &lp_wm_1_2;
+       }
 
-static void valleyview_disable_rps(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       partitioning = (best_lp_wm == &lp_wm_1_2) ?
+                      INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
 
-       /* we're doing forcewake before Disabling RC6,
-        * This what the BIOS expects when going into suspend */
-       gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+       ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results);
 
-       I915_WRITE(GEN6_RC_CONTROL, 0);
+       ilk_write_wm_values(dev_priv, &results);
+}
 
-       gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
+static void
+ilk_update_sprite_wm(struct drm_plane *plane,
+                    struct drm_crtc *crtc,
+                    uint32_t sprite_width, uint32_t sprite_height,
+                    int pixel_size, bool enabled, bool scaled)
+{
+       struct drm_device *dev = plane->dev;
+       struct intel_plane *intel_plane = to_intel_plane(plane);
+
+       intel_plane->wm.enabled = enabled;
+       intel_plane->wm.scaled = scaled;
+       intel_plane->wm.horiz_pixels = sprite_width;
+       intel_plane->wm.vert_pixels = sprite_width;
+       intel_plane->wm.bytes_per_pixel = pixel_size;
+
+       /*
+        * IVB workaround: must disable low power watermarks for at least
+        * one frame before enabling scaling.  LP watermarks can be re-enabled
+        * when scaling is disabled.
+        *
+        * WaCxSRDisabledForSpriteScaling:ivb
+        */
+       if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev))
+               intel_wait_for_vblank(dev, intel_plane->pipe);
 
-       gen6_disable_rps_interrupts(dev);
+       ilk_update_wm(crtc);
 }
 
-static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
+static void skl_pipe_wm_active_state(uint32_t val,
+                                    struct skl_pipe_wm *active,
+                                    bool is_transwm,
+                                    bool is_cursor,
+                                    int i,
+                                    int level)
 {
-       if (IS_VALLEYVIEW(dev)) {
-               if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
-                       mode = GEN6_RC_CTL_RC6_ENABLE;
-               else
-                       mode = 0;
+       bool is_enabled = (val & PLANE_WM_EN) != 0;
+
+       if (!is_transwm) {
+               if (!is_cursor) {
+                       active->wm[level].plane_en[i] = is_enabled;
+                       active->wm[level].plane_res_b[i] =
+                                       val & PLANE_WM_BLOCKS_MASK;
+                       active->wm[level].plane_res_l[i] =
+                                       (val >> PLANE_WM_LINES_SHIFT) &
+                                               PLANE_WM_LINES_MASK;
+               } else {
+                       active->wm[level].cursor_en = is_enabled;
+                       active->wm[level].cursor_res_b =
+                                       val & PLANE_WM_BLOCKS_MASK;
+                       active->wm[level].cursor_res_l =
+                                       (val >> PLANE_WM_LINES_SHIFT) &
+                                               PLANE_WM_LINES_MASK;
+               }
+       } else {
+               if (!is_cursor) {
+                       active->trans_wm.plane_en[i] = is_enabled;
+                       active->trans_wm.plane_res_b[i] =
+                                       val & PLANE_WM_BLOCKS_MASK;
+                       active->trans_wm.plane_res_l[i] =
+                                       (val >> PLANE_WM_LINES_SHIFT) &
+                                               PLANE_WM_LINES_MASK;
+               } else {
+                       active->trans_wm.cursor_en = is_enabled;
+                       active->trans_wm.cursor_res_b =
+                                       val & PLANE_WM_BLOCKS_MASK;
+                       active->trans_wm.cursor_res_l =
+                                       (val >> PLANE_WM_LINES_SHIFT) &
+                                               PLANE_WM_LINES_MASK;
+               }
        }
-       DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s, RC6p %s, RC6pp %s\n",
-                     (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
-                     (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
-                     (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
 }
 
-static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
+static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc)
 {
-       /* No RC6 before Ironlake */
-       if (INTEL_INFO(dev)->gen < 5)
-               return 0;
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       struct skl_pipe_wm *active = &intel_crtc->wm.skl_active;
+       enum pipe pipe = intel_crtc->pipe;
+       int level, i, max_level;
+       uint32_t temp;
 
-       /* RC6 is only on Ironlake mobile not on desktop */
-       if (INTEL_INFO(dev)->gen == 5 && !IS_IRONLAKE_M(dev))
-               return 0;
+       max_level = ilk_wm_max_level(dev);
 
-       /* Respect the kernel parameter if it is set */
-       if (enable_rc6 >= 0) {
-               int mask;
+       hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
 
-               if (INTEL_INFO(dev)->gen == 6 || IS_IVYBRIDGE(dev))
-                       mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
-                              INTEL_RC6pp_ENABLE;
-               else
-                       mask = INTEL_RC6_ENABLE;
+       for (level = 0; level <= max_level; level++) {
+               for (i = 0; i < intel_num_planes(intel_crtc); i++)
+                       hw->plane[pipe][i][level] =
+                                       I915_READ(PLANE_WM(pipe, i, level));
+               hw->cursor[pipe][level] = I915_READ(CUR_WM(pipe, level));
+       }
 
-               if ((enable_rc6 & mask) != enable_rc6)
-                       DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n",
-                                     enable_rc6 & mask, enable_rc6, mask);
+       for (i = 0; i < intel_num_planes(intel_crtc); i++)
+               hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i));
+       hw->cursor_trans[pipe] = I915_READ(CUR_WM_TRANS(pipe));
 
-               return enable_rc6 & mask;
-       }
+       if (!intel_crtc_active(crtc))
+               return;
 
-       /* Disable RC6 on Ironlake */
-       if (INTEL_INFO(dev)->gen == 5)
-               return 0;
+       hw->dirty[pipe] = true;
 
-       if (IS_IVYBRIDGE(dev))
-               return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
+       active->linetime = hw->wm_linetime[pipe];
 
-       return INTEL_RC6_ENABLE;
-}
+       for (level = 0; level <= max_level; level++) {
+               for (i = 0; i < intel_num_planes(intel_crtc); i++) {
+                       temp = hw->plane[pipe][i][level];
+                       skl_pipe_wm_active_state(temp, active, false,
+                                               false, i, level);
+               }
+               temp = hw->cursor[pipe][level];
+               skl_pipe_wm_active_state(temp, active, false, true, i, level);
+       }
 
-int intel_enable_rc6(const struct drm_device *dev)
-{
-       return i915.enable_rc6;
+       for (i = 0; i < intel_num_planes(intel_crtc); i++) {
+               temp = hw->plane_trans[pipe][i];
+               skl_pipe_wm_active_state(temp, active, true, false, i, 0);
+       }
+
+       temp = hw->cursor_trans[pipe];
+       skl_pipe_wm_active_state(temp, active, true, true, i, 0);
 }
 
-static void gen8_enable_rps_interrupts(struct drm_device *dev)
+void skl_wm_get_hw_state(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
+       struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
+       struct drm_crtc *crtc;
 
-       spin_lock_irq(&dev_priv->irq_lock);
-       WARN_ON(dev_priv->rps.pm_iir);
-       gen8_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
-       I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events);
-       spin_unlock_irq(&dev_priv->irq_lock);
+       skl_ddb_get_hw_state(dev_priv, ddb);
+       list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+               skl_pipe_wm_get_hw_state(crtc);
 }
 
-static void gen6_enable_rps_interrupts(struct drm_device *dev)
+static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc)
 {
+       struct drm_device *dev = crtc->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
+       struct ilk_wm_values *hw = &dev_priv->wm.hw;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       struct intel_pipe_wm *active = &intel_crtc->wm.active;
+       enum pipe pipe = intel_crtc->pipe;
+       static const unsigned int wm0_pipe_reg[] = {
+               [PIPE_A] = WM0_PIPEA_ILK,
+               [PIPE_B] = WM0_PIPEB_ILK,
+               [PIPE_C] = WM0_PIPEC_IVB,
+       };
 
-       spin_lock_irq(&dev_priv->irq_lock);
-       WARN_ON(dev_priv->rps.pm_iir);
-       gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
-       I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events);
-       spin_unlock_irq(&dev_priv->irq_lock);
-}
+       hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]);
+       if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+               hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe));
 
-static void parse_rp_state_cap(struct drm_i915_private *dev_priv, u32 rp_state_cap)
-{
-       /* All of these values are in units of 50MHz */
-       dev_priv->rps.cur_freq          = 0;
-       /* static values from HW: RP0 < RPe < RP1 < RPn (min_freq) */
-       dev_priv->rps.rp1_freq          = (rp_state_cap >>  8) & 0xff;
-       dev_priv->rps.rp0_freq          = (rp_state_cap >>  0) & 0xff;
-       dev_priv->rps.min_freq          = (rp_state_cap >> 16) & 0xff;
-       /* XXX: only BYT has a special efficient freq */
-       dev_priv->rps.efficient_freq    = dev_priv->rps.rp1_freq;
-       /* hw_max = RP0 until we check for overclocking */
-       dev_priv->rps.max_freq          = dev_priv->rps.rp0_freq;
+       active->pipe_enabled = intel_crtc_active(crtc);
 
-       /* Preserve min/max settings in case of re-init */
-       if (dev_priv->rps.max_freq_softlimit == 0)
-               dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
+       if (active->pipe_enabled) {
+               u32 tmp = hw->wm_pipe[pipe];
 
-       if (dev_priv->rps.min_freq_softlimit == 0)
-               dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
+               /*
+                * For active pipes LP0 watermark is marked as
+                * enabled, and LP1+ watermaks as disabled since
+                * we can't really reverse compute them in case
+                * multiple pipes are active.
+                */
+               active->wm[0].enable = true;
+               active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
+               active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
+               active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
+               active->linetime = hw->wm_linetime[pipe];
+       } else {
+               int level, max_level = ilk_wm_max_level(dev);
+
+               /*
+                * For inactive pipes, all watermark levels
+                * should be marked as enabled but zeroed,
+                * which is what we'd compute them to.
+                */
+               for (level = 0; level <= max_level; level++)
+                       active->wm[level].enable = true;
+       }
 }
 
-static void gen8_enable_rps(struct drm_device *dev)
+void ilk_wm_get_hw_state(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       struct intel_engine_cs *ring;
-       uint32_t rc6_mask = 0, rp_state_cap;
-       int unused;
+       struct ilk_wm_values *hw = &dev_priv->wm.hw;
+       struct drm_crtc *crtc;
 
-       /* 1a: Software RC state - RC0 */
-       I915_WRITE(GEN6_RC_STATE, 0);
+       for_each_crtc(dev, crtc)
+               ilk_pipe_wm_get_hw_state(crtc);
 
-       /* 1c & 1d: Get forcewake during program sequence. Although the driver
-        * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
-       gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+       hw->wm_lp[0] = I915_READ(WM1_LP_ILK);
+       hw->wm_lp[1] = I915_READ(WM2_LP_ILK);
+       hw->wm_lp[2] = I915_READ(WM3_LP_ILK);
 
-       /* 2a: Disable RC states. */
-       I915_WRITE(GEN6_RC_CONTROL, 0);
+       hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK);
+       if (INTEL_INFO(dev)->gen >= 7) {
+               hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB);
+               hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB);
+       }
 
-       rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-       parse_rp_state_cap(dev_priv, rp_state_cap);
+       if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+               hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
+                       INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
+       else if (IS_IVYBRIDGE(dev))
+               hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ?
+                       INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
 
-       /* 2b: Program RC6 thresholds.*/
-       I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
-       I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
-       I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
-       for_each_ring(ring, dev_priv, unused)
-               I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
-       I915_WRITE(GEN6_RC_SLEEP, 0);
-       if (IS_BROADWELL(dev))
-               I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
-       else
-               I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
+       hw->enable_fbc_wm =
+               !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
+}
 
-       /* 3: Enable RC6 */
-       if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
-               rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
-       intel_print_rc6_info(dev, rc6_mask);
-       if (IS_BROADWELL(dev))
-               I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
-                               GEN7_RC_CTL_TO_MODE |
-                               rc6_mask);
-       else
-               I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
-                               GEN6_RC_CTL_EI_MODE(1) |
-                               rc6_mask);
+/**
+ * intel_update_watermarks - update FIFO watermark values based on current modes
+ *
+ * Calculate watermark values for the various WM regs based on current mode
+ * and plane configuration.
+ *
+ * There are several cases to deal with here:
+ *   - normal (i.e. non-self-refresh)
+ *   - self-refresh (SR) mode
+ *   - lines are large relative to FIFO size (buffer can hold up to 2)
+ *   - lines are small relative to FIFO size (buffer can hold more than 2
+ *     lines), so need to account for TLB latency
+ *
+ *   The normal calculation is:
+ *     watermark = dotclock * bytes per pixel * latency
+ *   where latency is platform & configuration dependent (we assume pessimal
+ *   values here).
+ *
+ *   The SR calculation is:
+ *     watermark = (trunc(latency/line time)+1) * surface width *
+ *       bytes per pixel
+ *   where
+ *     line time = htotal / dotclock
+ *     surface width = hdisplay for normal plane and 64 for cursor
+ *   and latency is assumed to be high, as above.
+ *
+ * The final value programmed to the register should always be rounded up,
+ * and include an extra 2 entries to account for clock crossings.
+ *
+ * We don't use the sprite, so we can ignore that.  And on Crestline we have
+ * to set the non-SR watermarks to 8.
+ */
+void intel_update_watermarks(struct drm_crtc *crtc)
+{
+       struct drm_i915_private *dev_priv = crtc->dev->dev_private;
 
-       /* 4 Program defaults and thresholds for RPS*/
-       I915_WRITE(GEN6_RPNSWREQ,
-                  HSW_FREQUENCY(dev_priv->rps.rp1_freq));
-       I915_WRITE(GEN6_RC_VIDEO_FREQ,
-                  HSW_FREQUENCY(dev_priv->rps.rp1_freq));
-       /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
-       I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
+       if (dev_priv->display.update_wm)
+               dev_priv->display.update_wm(crtc);
+}
 
-       /* Docs recommend 900MHz, and 300 MHz respectively */
-       I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
-                  dev_priv->rps.max_freq_softlimit << 24 |
-                  dev_priv->rps.min_freq_softlimit << 16);
+void intel_update_sprite_watermarks(struct drm_plane *plane,
+                                   struct drm_crtc *crtc,
+                                   uint32_t sprite_width,
+                                   uint32_t sprite_height,
+                                   int pixel_size,
+                                   bool enabled, bool scaled)
+{
+       struct drm_i915_private *dev_priv = plane->dev->dev_private;
 
-       I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
-       I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
-       I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
-       I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
+       if (dev_priv->display.update_sprite_wm)
+               dev_priv->display.update_sprite_wm(plane, crtc,
+                                                  sprite_width, sprite_height,
+                                                  pixel_size, enabled, scaled);
+}
 
-       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+static struct drm_i915_gem_object *
+intel_alloc_context_page(struct drm_device *dev)
+{
+       struct drm_i915_gem_object *ctx;
+       int ret;
 
-       /* 5: Enable RPS */
-       I915_WRITE(GEN6_RP_CONTROL,
-                  GEN6_RP_MEDIA_TURBO |
-                  GEN6_RP_MEDIA_HW_NORMAL_MODE |
-                  GEN6_RP_MEDIA_IS_GFX |
-                  GEN6_RP_ENABLE |
-                  GEN6_RP_UP_BUSY_AVG |
-                  GEN6_RP_DOWN_IDLE_AVG);
+       WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-       /* 6: Ring frequency + overclocking (our driver does this later */
+       ctx = i915_gem_alloc_object(dev, 4096);
+       if (!ctx) {
+               DRM_DEBUG("failed to alloc power context, RC6 disabled\n");
+               return NULL;
+       }
 
-       gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8);
+       ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0);
+       if (ret) {
+               DRM_ERROR("failed to pin power context: %d\n", ret);
+               goto err_unref;
+       }
 
-       gen8_enable_rps_interrupts(dev);
+       ret = i915_gem_object_set_to_gtt_domain(ctx, 1);
+       if (ret) {
+               DRM_ERROR("failed to set-domain on power context: %d\n", ret);
+               goto err_unpin;
+       }
 
-       gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
+       return ctx;
+
+err_unpin:
+       i915_gem_object_ggtt_unpin(ctx);
+err_unref:
+       drm_gem_object_unreference(&ctx->base);
+       return NULL;
 }
 
-static void gen6_enable_rps(struct drm_device *dev)
+/**
+ * Lock protecting IPS related data structures
+ */
+DEFINE_SPINLOCK(mchdev_lock);
+
+/* Global for IPS driver to get at the current i915 device. Protected by
+ * mchdev_lock. */
+static struct drm_i915_private *i915_mch_dev;
+
+bool ironlake_set_drps(struct drm_device *dev, u8 val)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       struct intel_engine_cs *ring;
-       u32 rp_state_cap;
-       u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
-       u32 gtfifodbg;
-       int rc6_mode;
-       int i, ret;
-
-       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+       u16 rgvswctl;
 
-       /* Here begins a magic sequence of register writes to enable
-        * auto-downclocking.
-        *
-        * Perhaps there might be some value in exposing these to
-        * userspace...
-        */
-       I915_WRITE(GEN6_RC_STATE, 0);
+       assert_spin_locked(&mchdev_lock);
 
-       /* Clear the DBG now so we don't confuse earlier errors */
-       if ((gtfifodbg = I915_READ(GTFIFODBG))) {
-               DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
-               I915_WRITE(GTFIFODBG, gtfifodbg);
+       rgvswctl = I915_READ16(MEMSWCTL);
+       if (rgvswctl & MEMCTL_CMD_STS) {
+               DRM_DEBUG("gpu busy, RCS change rejected\n");
+               return false; /* still busy with another command */
        }
 
-       gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+       rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+               (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
+       I915_WRITE16(MEMSWCTL, rgvswctl);
+       POSTING_READ16(MEMSWCTL);
 
-       rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+       rgvswctl |= MEMCTL_CMD_STS;
+       I915_WRITE16(MEMSWCTL, rgvswctl);
 
-       parse_rp_state_cap(dev_priv, rp_state_cap);
+       return true;
+}
 
-       /* disable the counters and set deterministic thresholds */
-       I915_WRITE(GEN6_RC_CONTROL, 0);
+static void ironlake_enable_drps(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 rgvmodectl = I915_READ(MEMMODECTL);
+       u8 fmax, fmin, fstart, vstart;
 
-       I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
-       I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
-       I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
-       I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
-       I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
+       spin_lock_irq(&mchdev_lock);
 
-       for_each_ring(ring, dev_priv, i)
-               I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
+       /* Enable temp reporting */
+       I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
+       I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
 
-       I915_WRITE(GEN6_RC_SLEEP, 0);
-       I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
-       if (IS_IVYBRIDGE(dev))
-               I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
-       else
-               I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
-       I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
-       I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
+       /* 100ms RC evaluation intervals */
+       I915_WRITE(RCUPEI, 100000);
+       I915_WRITE(RCDNEI, 100000);
 
-       /* Check if we are enabling RC6 */
-       rc6_mode = intel_enable_rc6(dev_priv->dev);
-       if (rc6_mode & INTEL_RC6_ENABLE)
-               rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
+       /* Set max/min thresholds to 90ms and 80ms respectively */
+       I915_WRITE(RCBMAXAVG, 90000);
+       I915_WRITE(RCBMINAVG, 80000);
 
-       /* We don't use those on Haswell */
-       if (!IS_HASWELL(dev)) {
-               if (rc6_mode & INTEL_RC6p_ENABLE)
-                       rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
+       I915_WRITE(MEMIHYST, 1);
 
-               if (rc6_mode & INTEL_RC6pp_ENABLE)
-                       rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
-       }
+       /* Set up min, max, and cur for interrupt handling */
+       fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
+       fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
+       fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+               MEMMODE_FSTART_SHIFT;
 
-       intel_print_rc6_info(dev, rc6_mask);
+       vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >>
+               PXVFREQ_PX_SHIFT;
 
-       I915_WRITE(GEN6_RC_CONTROL,
-                  rc6_mask |
-                  GEN6_RC_CTL_EI_MODE(1) |
-                  GEN6_RC_CTL_HW_ENABLE);
+       dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
+       dev_priv->ips.fstart = fstart;
 
-       /* Power down if completely idle for over 50ms */
-       I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
-       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+       dev_priv->ips.max_delay = fstart;
+       dev_priv->ips.min_delay = fmin;
+       dev_priv->ips.cur_delay = fstart;
 
-       ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
-       if (ret)
-               DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
+       DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
+                        fmax, fmin, fstart);
 
-       ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
-       if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
-               DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
-                                (dev_priv->rps.max_freq_softlimit & 0xff) * 50,
-                                (pcu_mbox & 0xff) * 50);
-               dev_priv->rps.max_freq = pcu_mbox & 0xff;
-       }
+       I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
 
-       dev_priv->rps.power = HIGH_POWER; /* force a reset */
-       gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+       /*
+        * Interrupts will be enabled in ironlake_irq_postinstall
+        */
 
-       gen6_enable_rps_interrupts(dev);
+       I915_WRITE(VIDSTART, vstart);
+       POSTING_READ(VIDSTART);
 
-       rc6vids = 0;
-       ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
-       if (IS_GEN6(dev) && ret) {
-               DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
-       } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
-               DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
-                         GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
-               rc6vids &= 0xffff00;
-               rc6vids |= GEN6_ENCODE_RC6_VID(450);
-               ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
-               if (ret)
-                       DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
-       }
+       rgvmodectl |= MEMMODE_SWMODE_EN;
+       I915_WRITE(MEMMODECTL, rgvmodectl);
 
-       gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
+       if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
+               DRM_ERROR("stuck trying to change perf mode\n");
+       mdelay(1);
+
+       ironlake_set_drps(dev, fstart);
+
+       dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) +
+               I915_READ(0x112e0);
+       dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
+       dev_priv->ips.last_count2 = I915_READ(0x112f4);
+       dev_priv->ips.last_time2 = ktime_get_raw_ns();
+
+       spin_unlock_irq(&mchdev_lock);
 }
 
-static void __gen6_update_ring_freq(struct drm_device *dev)
+static void ironlake_disable_drps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       int min_freq = 15;
-       unsigned int gpu_freq;
-       unsigned int max_ia_freq, min_ring_freq;
-       int scaling_factor = 180;
-       struct cpufreq_policy *policy;
+       u16 rgvswctl;
 
-       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+       spin_lock_irq(&mchdev_lock);
 
-       policy = cpufreq_cpu_get(0);
-       if (policy) {
-               max_ia_freq = policy->cpuinfo.max_freq;
-               cpufreq_cpu_put(policy);
-       } else {
-               /*
-                * Default to measured freq if none found, PCU will ensure we
-                * don't go over
-                */
-               max_ia_freq = tsc_khz;
-       }
+       rgvswctl = I915_READ16(MEMSWCTL);
 
-       /* Convert from kHz to MHz */
-       max_ia_freq /= 1000;
+       /* Ack interrupts, disable EFC interrupt */
+       I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
+       I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
+       I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
+       I915_WRITE(DEIIR, DE_PCU_EVENT);
+       I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
 
-       min_ring_freq = I915_READ(DCLK) & 0xf;
-       /* convert DDR frequency from units of 266.6MHz to bandwidth */
-       min_ring_freq = mult_frac(min_ring_freq, 8, 3);
+       /* Go back to the starting frequency */
+       ironlake_set_drps(dev, dev_priv->ips.fstart);
+       mdelay(1);
+       rgvswctl |= MEMCTL_CMD_STS;
+       I915_WRITE(MEMSWCTL, rgvswctl);
+       mdelay(1);
 
-       /*
-        * For each potential GPU frequency, load a ring frequency we'd like
-        * to use for memory access.  We do this by specifying the IA frequency
-        * the PCU should use as a reference to determine the ring frequency.
-        */
-       for (gpu_freq = dev_priv->rps.max_freq_softlimit; gpu_freq >= dev_priv->rps.min_freq_softlimit;
-            gpu_freq--) {
-               int diff = dev_priv->rps.max_freq_softlimit - gpu_freq;
-               unsigned int ia_freq = 0, ring_freq = 0;
+       spin_unlock_irq(&mchdev_lock);
+}
 
-               if (INTEL_INFO(dev)->gen >= 8) {
-                       /* max(2 * GT, DDR). NB: GT is 50MHz units */
-                       ring_freq = max(min_ring_freq, gpu_freq);
-               } else if (IS_HASWELL(dev)) {
-                       ring_freq = mult_frac(gpu_freq, 5, 4);
-                       ring_freq = max(min_ring_freq, ring_freq);
-                       /* leave ia_freq as the default, chosen by cpufreq */
-               } else {
-                       /* On older processors, there is no separate ring
-                        * clock domain, so in order to boost the bandwidth
-                        * of the ring, we need to upclock the CPU (ia_freq).
-                        *
-                        * For GPU frequencies less than 750MHz,
-                        * just use the lowest ring freq.
-                        */
-                       if (gpu_freq < min_freq)
-                               ia_freq = 800;
-                       else
-                               ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
-                       ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
-               }
+/* There's a funny hw issue where the hw returns all 0 when reading from
+ * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value
+ * ourselves, instead of doing a rmw cycle (which might result in us clearing
+ * all limits and the gpu stuck at whatever frequency it is at atm).
+ */
+static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val)
+{
+       u32 limits;
 
-               sandybridge_pcode_write(dev_priv,
-                                       GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
-                                       ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
-                                       ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
-                                       gpu_freq);
-       }
+       /* Only set the down limit when we've reached the lowest level to avoid
+        * getting more interrupts, otherwise leave this clear. This prevents a
+        * race in the hw when coming out of rc6: There's a tiny window where
+        * the hw runs at the minimal clock before selecting the desired
+        * frequency, if the down threshold expires in that window we will not
+        * receive a down interrupt. */
+       limits = dev_priv->rps.max_freq_softlimit << 24;
+       if (val <= dev_priv->rps.min_freq_softlimit)
+               limits |= dev_priv->rps.min_freq_softlimit << 16;
+
+       return limits;
 }
 
-void gen6_update_ring_freq(struct drm_device *dev)
+static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       int new_power;
 
-       if (INTEL_INFO(dev)->gen < 6 || IS_VALLEYVIEW(dev))
+       new_power = dev_priv->rps.power;
+       switch (dev_priv->rps.power) {
+       case LOW_POWER:
+               if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq)
+                       new_power = BETWEEN;
+               break;
+
+       case BETWEEN:
+               if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq)
+                       new_power = LOW_POWER;
+               else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq)
+                       new_power = HIGH_POWER;
+               break;
+
+       case HIGH_POWER:
+               if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq)
+                       new_power = BETWEEN;
+               break;
+       }
+       /* Max/min bins are special */
+       if (val == dev_priv->rps.min_freq_softlimit)
+               new_power = LOW_POWER;
+       if (val == dev_priv->rps.max_freq_softlimit)
+               new_power = HIGH_POWER;
+       if (new_power == dev_priv->rps.power)
                return;
 
-       mutex_lock(&dev_priv->rps.hw_lock);
-       __gen6_update_ring_freq(dev);
-       mutex_unlock(&dev_priv->rps.hw_lock);
-}
+       /* Note the units here are not exactly 1us, but 1280ns. */
+       switch (new_power) {
+       case LOW_POWER:
+               /* Upclock if more than 95% busy over 16ms */
+               I915_WRITE(GEN6_RP_UP_EI, 12500);
+               I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800);
 
-static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
-{
-       u32 val, rp0;
+               /* Downclock if less than 85% busy over 32ms */
+               I915_WRITE(GEN6_RP_DOWN_EI, 25000);
+               I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250);
 
-       val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
-       rp0 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK;
+               I915_WRITE(GEN6_RP_CONTROL,
+                          GEN6_RP_MEDIA_TURBO |
+                          GEN6_RP_MEDIA_HW_NORMAL_MODE |
+                          GEN6_RP_MEDIA_IS_GFX |
+                          GEN6_RP_ENABLE |
+                          GEN6_RP_UP_BUSY_AVG |
+                          GEN6_RP_DOWN_IDLE_AVG);
+               break;
 
-       return rp0;
-}
+       case BETWEEN:
+               /* Upclock if more than 90% busy over 13ms */
+               I915_WRITE(GEN6_RP_UP_EI, 10250);
+               I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225);
 
-static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
-{
-       u32 val, rpe;
+               /* Downclock if less than 75% busy over 32ms */
+               I915_WRITE(GEN6_RP_DOWN_EI, 25000);
+               I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750);
 
-       val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
-       rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
+               I915_WRITE(GEN6_RP_CONTROL,
+                          GEN6_RP_MEDIA_TURBO |
+                          GEN6_RP_MEDIA_HW_NORMAL_MODE |
+                          GEN6_RP_MEDIA_IS_GFX |
+                          GEN6_RP_ENABLE |
+                          GEN6_RP_UP_BUSY_AVG |
+                          GEN6_RP_DOWN_IDLE_AVG);
+               break;
 
-       return rpe;
-}
+       case HIGH_POWER:
+               /* Upclock if more than 85% busy over 10ms */
+               I915_WRITE(GEN6_RP_UP_EI, 8000);
+               I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800);
 
-static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
-{
-       u32 val, rp1;
+               /* Downclock if less than 60% busy over 32ms */
+               I915_WRITE(GEN6_RP_DOWN_EI, 25000);
+               I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000);
 
-       val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-       rp1 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK;
+               I915_WRITE(GEN6_RP_CONTROL,
+                          GEN6_RP_MEDIA_TURBO |
+                          GEN6_RP_MEDIA_HW_NORMAL_MODE |
+                          GEN6_RP_MEDIA_IS_GFX |
+                          GEN6_RP_ENABLE |
+                          GEN6_RP_UP_BUSY_AVG |
+                          GEN6_RP_DOWN_IDLE_AVG);
+               break;
+       }
 
-       return rp1;
+       dev_priv->rps.power = new_power;
+       dev_priv->rps.last_adj = 0;
 }
 
-static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
+static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
 {
-       u32 val, rpn;
+       u32 mask = 0;
 
-       val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
-       rpn = (val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) & PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK;
-       return rpn;
-}
+       if (val > dev_priv->rps.min_freq_softlimit)
+               mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT;
+       if (val < dev_priv->rps.max_freq_softlimit)
+               mask |= GEN6_PM_RP_UP_THRESHOLD;
 
-static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
-{
-       u32 val, rp1;
+       mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED);
+       mask &= dev_priv->pm_rps_events;
 
-       val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
+       /* IVB and SNB hard hangs on looping batchbuffer
+        * if GEN6_PM_UP_EI_EXPIRED is masked.
+        */
+       if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev))
+               mask |= GEN6_PM_RP_UP_EI_EXPIRED;
 
-       rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
+       if (IS_GEN8(dev_priv->dev))
+               mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP;
 
-       return rp1;
+       return ~mask;
 }
 
-static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
+/* gen6_set_rps is called to update the frequency request, but should also be
+ * called when the range (min_delay and max_delay) is modified so that we can
+ * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */
+void gen6_set_rps(struct drm_device *dev, u8 val)
 {
-       u32 val, rp0;
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-       val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+       WARN_ON(val > dev_priv->rps.max_freq_softlimit);
+       WARN_ON(val < dev_priv->rps.min_freq_softlimit);
 
-       rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
-       /* Clamp to max */
-       rp0 = min_t(u32, rp0, 0xea);
+       /* min/max delay may still have been modified so be sure to
+        * write the limits value.
+        */
+       if (val != dev_priv->rps.cur_freq) {
+               gen6_set_rps_thresholds(dev_priv, val);
 
-       return rp0;
-}
+               if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+                       I915_WRITE(GEN6_RPNSWREQ,
+                                  HSW_FREQUENCY(val));
+               else
+                       I915_WRITE(GEN6_RPNSWREQ,
+                                  GEN6_FREQUENCY(val) |
+                                  GEN6_OFFSET(0) |
+                                  GEN6_AGGRESSIVE_TURBO);
+       }
 
-static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
-{
-       u32 val, rpe;
+       /* Make sure we continue to get interrupts
+        * until we hit the minimum or maximum frequencies.
+        */
+       I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, gen6_rps_limits(dev_priv, val));
+       I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
-       val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
-       rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
-       val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
-       rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
+       POSTING_READ(GEN6_RPNSWREQ);
 
-       return rpe;
+       dev_priv->rps.cur_freq = val;
+       trace_intel_gpu_freq_change(val * 50);
 }
 
-static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
+/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down
+ *
+ * * If Gfx is Idle, then
+ * 1. Mask Turbo interrupts
+ * 2. Bring up Gfx clock
+ * 3. Change the freq to Rpn and wait till P-Unit updates freq
+ * 4. Clear the Force GFX CLK ON bit so that Gfx can down
+ * 5. Unmask Turbo interrupts
+*/
+static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 {
-       return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
-}
+       struct drm_device *dev = dev_priv->dev;
 
-/* Check that the pctx buffer wasn't move under us. */
-static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
-{
-       unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
+       /* Latest VLV doesn't need to force the gfx clock */
+       if (dev->pdev->revision >= 0xd) {
+               valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+               return;
+       }
 
-       WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
-                            dev_priv->vlv_pctx->stolen->start);
-}
+       /*
+        * When we are idle.  Drop to min voltage state.
+        */
 
+       if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit)
+               return;
 
-/* Check that the pcbr address is not empty. */
-static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
-{
-       unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
+       /* Mask turbo interrupt so that they will not come in between */
+       I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
 
-       WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
-}
+       vlv_force_gfx_clock(dev_priv, true);
 
-static void cherryview_setup_pctx(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       unsigned long pctx_paddr, paddr;
-       struct i915_gtt *gtt = &dev_priv->gtt;
-       u32 pcbr;
-       int pctx_size = 32*1024;
+       dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit;
 
-       WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+       vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ,
+                                       dev_priv->rps.min_freq_softlimit);
 
-       pcbr = I915_READ(VLV_PCBR);
-       if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
-               paddr = (dev_priv->mm.stolen_base +
-                        (gtt->stolen_size - pctx_size));
+       if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
+                               & GENFREQSTATUS) == 0, 100))
+               DRM_ERROR("timed out waiting for Punit\n");
 
-               pctx_paddr = (paddr & (~4095));
-               I915_WRITE(VLV_PCBR, pctx_paddr);
-       }
+       vlv_force_gfx_clock(dev_priv, false);
+
+       I915_WRITE(GEN6_PMINTRMSK,
+                  gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
 }
 
-static void valleyview_setup_pctx(struct drm_device *dev)
+void gen6_rps_idle(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct drm_i915_gem_object *pctx;
-       unsigned long pctx_paddr;
-       u32 pcbr;
-       int pctx_size = 24*1024;
-
-       WARN_ON(!mutex_is_locked(&dev->struct_mutex));
-
-       pcbr = I915_READ(VLV_PCBR);
-       if (pcbr) {
-               /* BIOS set it up already, grab the pre-alloc'd space */
-               int pcbr_offset;
-
-               pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
-               pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
-                                                                     pcbr_offset,
-                                                                     I915_GTT_OFFSET_NONE,
-                                                                     pctx_size);
-               goto out;
-       }
+       struct drm_device *dev = dev_priv->dev;
 
-       /*
-        * From the Gunit register HAS:
-        * The Gfx driver is expected to program this register and ensure
-        * proper allocation within Gfx stolen memory.  For example, this
-        * register should be programmed such than the PCBR range does not
-        * overlap with other ranges, such as the frame buffer, protected
-        * memory, or any other relevant ranges.
-        */
-       pctx = i915_gem_object_create_stolen(dev, pctx_size);
-       if (!pctx) {
-               DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
-               return;
+       mutex_lock(&dev_priv->rps.hw_lock);
+       if (dev_priv->rps.enabled) {
+               if (IS_CHERRYVIEW(dev))
+                       valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+               else if (IS_VALLEYVIEW(dev))
+                       vlv_set_rps_idle(dev_priv);
+               else
+                       gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
+               dev_priv->rps.last_adj = 0;
        }
-
-       pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
-       I915_WRITE(VLV_PCBR, pctx_paddr);
-
-out:
-       dev_priv->vlv_pctx = pctx;
+       mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
-static void valleyview_cleanup_pctx(struct drm_device *dev)
+void gen6_rps_boost(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       if (WARN_ON(!dev_priv->vlv_pctx))
-               return;
+       struct drm_device *dev = dev_priv->dev;
 
-       drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
-       dev_priv->vlv_pctx = NULL;
+       mutex_lock(&dev_priv->rps.hw_lock);
+       if (dev_priv->rps.enabled) {
+               if (IS_VALLEYVIEW(dev))
+                       valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
+               else
+                       gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
+               dev_priv->rps.last_adj = 0;
+       }
+       mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
-static void valleyview_init_gt_powersave(struct drm_device *dev)
+void valleyview_set_rps(struct drm_device *dev, u8 val)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 val;
 
-       valleyview_setup_pctx(dev);
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+       WARN_ON(val > dev_priv->rps.max_freq_softlimit);
+       WARN_ON(val < dev_priv->rps.min_freq_softlimit);
 
-       mutex_lock(&dev_priv->rps.hw_lock);
+       if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1),
+                     "Odd GPU freq value\n"))
+               val &= ~1;
 
-       val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-       switch ((val >> 6) & 3) {
-       case 0:
-       case 1:
-               dev_priv->mem_freq = 800;
-               break;
-       case 2:
-               dev_priv->mem_freq = 1066;
-               break;
-       case 3:
-               dev_priv->mem_freq = 1333;
-               break;
-       }
-       DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
+       if (val != dev_priv->rps.cur_freq)
+               vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
 
-       dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
-       dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
-       DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq),
-                        dev_priv->rps.max_freq);
+       I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
-       dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
-       DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
-                        dev_priv->rps.efficient_freq);
+       dev_priv->rps.cur_freq = val;
+       trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv, val));
+}
 
-       dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv);
-       DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
-                        dev_priv->rps.rp1_freq);
+static void gen9_disable_rps(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-       dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
-       DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq),
-                        dev_priv->rps.min_freq);
+       I915_WRITE(GEN6_RC_CONTROL, 0);
+}
 
-       /* Preserve min/max settings in case of re-init */
-       if (dev_priv->rps.max_freq_softlimit == 0)
-               dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
+static void gen6_disable_rps(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (dev_priv->rps.min_freq_softlimit == 0)
-               dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
+       I915_WRITE(GEN6_RC_CONTROL, 0);
+       I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
+}
 
-       mutex_unlock(&dev_priv->rps.hw_lock);
+static void cherryview_disable_rps(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       I915_WRITE(GEN6_RC_CONTROL, 0);
 }
 
-static void cherryview_init_gt_powersave(struct drm_device *dev)
+static void valleyview_disable_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 val;
 
-       cherryview_setup_pctx(dev);
+       /* we're doing forcewake before Disabling RC6,
+        * This what the BIOS expects when going into suspend */
+       gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
 
-       mutex_lock(&dev_priv->rps.hw_lock);
+       I915_WRITE(GEN6_RC_CONTROL, 0);
 
-       val = vlv_punit_read(dev_priv, CCK_FUSE_REG);
-       switch ((val >> 2) & 0x7) {
-       case 0:
-       case 1:
-               dev_priv->rps.cz_freq = 200;
-               dev_priv->mem_freq = 1600;
-               break;
-       case 2:
-               dev_priv->rps.cz_freq = 267;
-               dev_priv->mem_freq = 1600;
-               break;
-       case 3:
-               dev_priv->rps.cz_freq = 333;
-               dev_priv->mem_freq = 2000;
-               break;
-       case 4:
-               dev_priv->rps.cz_freq = 320;
-               dev_priv->mem_freq = 1600;
-               break;
-       case 5:
-               dev_priv->rps.cz_freq = 400;
-               dev_priv->mem_freq = 1600;
-               break;
+       gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+static void intel_print_rc6_info(struct drm_device *dev, u32 mode)
+{
+       if (IS_VALLEYVIEW(dev)) {
+               if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1)))
+                       mode = GEN6_RC_CTL_RC6_ENABLE;
+               else
+                       mode = 0;
        }
-       DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq);
+       if (HAS_RC6p(dev))
+               DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n",
+                             (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off",
+                             (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off",
+                             (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off");
 
-       dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
-       dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
-       DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq),
-                        dev_priv->rps.max_freq);
+       else
+               DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n",
+                             (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off");
+}
 
-       dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv);
-       DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
-                        dev_priv->rps.efficient_freq);
+static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
+{
+       /* No RC6 before Ironlake */
+       if (INTEL_INFO(dev)->gen < 5)
+               return 0;
 
-       dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv);
-       DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
-                        dev_priv->rps.rp1_freq);
+       /* RC6 is only on Ironlake mobile not on desktop */
+       if (INTEL_INFO(dev)->gen == 5 && !IS_IRONLAKE_M(dev))
+               return 0;
 
-       dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv);
-       DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq),
-                        dev_priv->rps.min_freq);
+       /* Respect the kernel parameter if it is set */
+       if (enable_rc6 >= 0) {
+               int mask;
 
-       WARN_ONCE((dev_priv->rps.max_freq |
-                  dev_priv->rps.efficient_freq |
-                  dev_priv->rps.rp1_freq |
-                  dev_priv->rps.min_freq) & 1,
-                 "Odd GPU freq values\n");
+               if (HAS_RC6p(dev))
+                       mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE |
+                              INTEL_RC6pp_ENABLE;
+               else
+                       mask = INTEL_RC6_ENABLE;
 
-       /* Preserve min/max settings in case of re-init */
-       if (dev_priv->rps.max_freq_softlimit == 0)
-               dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
+               if ((enable_rc6 & mask) != enable_rc6)
+                       DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n",
+                                     enable_rc6 & mask, enable_rc6, mask);
 
-       if (dev_priv->rps.min_freq_softlimit == 0)
-               dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
+               return enable_rc6 & mask;
+       }
 
-       mutex_unlock(&dev_priv->rps.hw_lock);
+       /* Disable RC6 on Ironlake */
+       if (INTEL_INFO(dev)->gen == 5)
+               return 0;
+
+       if (IS_IVYBRIDGE(dev))
+               return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
+
+       return INTEL_RC6_ENABLE;
 }
 
-static void valleyview_cleanup_gt_powersave(struct drm_device *dev)
+int intel_enable_rc6(const struct drm_device *dev)
 {
-       valleyview_cleanup_pctx(dev);
+       return i915.enable_rc6;
 }
 
-static void cherryview_enable_rps(struct drm_device *dev)
+static void gen6_init_rps_frequencies(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       struct intel_engine_cs *ring;
-       u32 gtfifodbg, val, rc6_mode = 0, pcbr;
-       int i;
+       uint32_t rp_state_cap;
+       u32 ddcc_status = 0;
+       int ret;
 
-       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+       rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
+       /* All of these values are in units of 50MHz */
+       dev_priv->rps.cur_freq          = 0;
+       /* static values from HW: RP0 > RP1 > RPn (min_freq) */
+       dev_priv->rps.rp0_freq          = (rp_state_cap >>  0) & 0xff;
+       dev_priv->rps.rp1_freq          = (rp_state_cap >>  8) & 0xff;
+       dev_priv->rps.min_freq          = (rp_state_cap >> 16) & 0xff;
+       /* hw_max = RP0 until we check for overclocking */
+       dev_priv->rps.max_freq          = dev_priv->rps.rp0_freq;
 
-       gtfifodbg = I915_READ(GTFIFODBG);
-       if (gtfifodbg) {
-               DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
-                                gtfifodbg);
-               I915_WRITE(GTFIFODBG, gtfifodbg);
+       dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq;
+       if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
+               ret = sandybridge_pcode_read(dev_priv,
+                                       HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
+                                       &ddcc_status);
+               if (0 == ret)
+                       dev_priv->rps.efficient_freq =
+                               (ddcc_status >> 8) & 0xff;
        }
 
-       cherryview_check_pctx(dev_priv);
+       /* Preserve min/max settings in case of re-init */
+       if (dev_priv->rps.max_freq_softlimit == 0)
+               dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
 
-       /* 1a & 1b: Get forcewake during program sequence. Although the driver
+       if (dev_priv->rps.min_freq_softlimit == 0) {
+               if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+                       dev_priv->rps.min_freq_softlimit =
+                               /* max(RPe, 450 MHz) */
+                               max(dev_priv->rps.efficient_freq, (u8) 9);
+               else
+                       dev_priv->rps.min_freq_softlimit =
+                               dev_priv->rps.min_freq;
+       }
+}
+
+static void gen9_enable_rps(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_engine_cs *ring;
+       uint32_t rc6_mask = 0;
+       int unused;
+
+       /* 1a: Software RC state - RC0 */
+       I915_WRITE(GEN6_RC_STATE, 0);
+
+       /* 1b: Get forcewake during program sequence. Although the driver
         * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
        gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
 
-       /* 2a: Program RC6 thresholds.*/
-       I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+       /* 2a: Disable RC states. */
+       I915_WRITE(GEN6_RC_CONTROL, 0);
+
+       /* 2b: Program RC6 thresholds.*/
+       I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
        I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
        I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
-
-       for_each_ring(ring, dev_priv, i)
+       for_each_ring(ring, dev_priv, unused)
                I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
        I915_WRITE(GEN6_RC_SLEEP, 0);
+       I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
 
-       I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
+       /* 3a: Enable RC6 */
+       if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
+               rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
+       DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
+                       "on" : "off");
+       I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
+                                  GEN6_RC_CTL_EI_MODE(1) |
+                                  rc6_mask);
 
-       /* allows RC6 residency counter to work */
-       I915_WRITE(VLV_COUNTER_CONTROL,
-                  _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
-                                     VLV_MEDIA_RC6_COUNT_EN |
-                                     VLV_RENDER_RC6_COUNT_EN));
+       gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 
-       /* For now we assume BIOS is allocating and populating the PCBR  */
-       pcbr = I915_READ(VLV_PCBR);
+}
+
+static void gen8_enable_rps(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_engine_cs *ring;
+       uint32_t rc6_mask = 0;
+       int unused;
 
-       DRM_DEBUG_DRIVER("PCBR offset : 0x%x\n", pcbr);
+       /* 1a: Software RC state - RC0 */
+       I915_WRITE(GEN6_RC_STATE, 0);
 
-       /* 3: Enable RC6 */
-       if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) &&
-                                               (pcbr >> VLV_PCBR_ADDR_SHIFT))
-               rc6_mode = GEN6_RC_CTL_EI_MODE(1);
+       /* 1c & 1d: Get forcewake during program sequence. Although the driver
+        * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
+       gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
 
-       I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
+       /* 2a: Disable RC states. */
+       I915_WRITE(GEN6_RC_CONTROL, 0);
+
+       /* Initialize rps frequencies */
+       gen6_init_rps_frequencies(dev);
+
+       /* 2b: Program RC6 thresholds.*/
+       I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+       I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+       I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+       for_each_ring(ring, dev_priv, unused)
+               I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
+       I915_WRITE(GEN6_RC_SLEEP, 0);
+       if (IS_BROADWELL(dev))
+               I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
+       else
+               I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
+
+       /* 3: Enable RC6 */
+       if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
+               rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
+       intel_print_rc6_info(dev, rc6_mask);
+       if (IS_BROADWELL(dev))
+               I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
+                               GEN7_RC_CTL_TO_MODE |
+                               rc6_mask);
+       else
+               I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE |
+                               GEN6_RC_CTL_EI_MODE(1) |
+                               rc6_mask);
 
        /* 4 Program defaults and thresholds for RPS*/
-       I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
-       I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
-       I915_WRITE(GEN6_RP_UP_EI, 66000);
-       I915_WRITE(GEN6_RP_DOWN_EI, 350000);
+       I915_WRITE(GEN6_RPNSWREQ,
+                  HSW_FREQUENCY(dev_priv->rps.rp1_freq));
+       I915_WRITE(GEN6_RC_VIDEO_FREQ,
+                  HSW_FREQUENCY(dev_priv->rps.rp1_freq));
+       /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
+       I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */
 
-       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+       /* Docs recommend 900MHz, and 300 MHz respectively */
+       I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
+                  dev_priv->rps.max_freq_softlimit << 24 |
+                  dev_priv->rps.min_freq_softlimit << 16);
 
-       /* WaDisablePwrmtrEvent:chv (pre-production hw) */
-       I915_WRITE(0xA80C, I915_READ(0xA80C) & 0x00ffffff);
-       I915_WRITE(0xA810, I915_READ(0xA810) & 0xffffff00);
+       I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */
+       I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/
+       I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */
+       I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */
+
+       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
 
        /* 5: Enable RPS */
        I915_WRITE(GEN6_RP_CONTROL,
+                  GEN6_RP_MEDIA_TURBO |
                   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-                  GEN6_RP_MEDIA_IS_GFX | /* WaSetMaskForGfxBusyness:chv (pre-production hw ?) */
+                  GEN6_RP_MEDIA_IS_GFX |
                   GEN6_RP_ENABLE |
                   GEN6_RP_UP_BUSY_AVG |
                   GEN6_RP_DOWN_IDLE_AVG);
 
-       val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-
-       DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 0x10 ? "yes" : "no");
-       DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
-
-       dev_priv->rps.cur_freq = (val >> 8) & 0xff;
-       DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
-                        dev_priv->rps.cur_freq);
-
-       DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
-                        dev_priv->rps.efficient_freq);
-
-       valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
+       /* 6: Ring frequency + overclocking (our driver does this later */
 
-       gen8_enable_rps_interrupts(dev);
+       dev_priv->rps.power = HIGH_POWER; /* force a reset */
+       gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
 
        gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 }
 
-static void valleyview_enable_rps(struct drm_device *dev)
+static void gen6_enable_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_engine_cs *ring;
-       u32 gtfifodbg, val, rc6_mode = 0;
-       int i;
+       u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
+       u32 gtfifodbg;
+       int rc6_mode;
+       int i, ret;
 
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
-       valleyview_check_pctx(dev_priv);
+       /* Here begins a magic sequence of register writes to enable
+        * auto-downclocking.
+        *
+        * Perhaps there might be some value in exposing these to
+        * userspace...
+        */
+       I915_WRITE(GEN6_RC_STATE, 0);
 
+       /* Clear the DBG now so we don't confuse earlier errors */
        if ((gtfifodbg = I915_READ(GTFIFODBG))) {
-               DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
-                                gtfifodbg);
+               DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg);
                I915_WRITE(GTFIFODBG, gtfifodbg);
        }
 
-       /* If VLV, Forcewake all wells, else re-direct to regular path */
        gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
 
-       I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
-       I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
-       I915_WRITE(GEN6_RP_UP_EI, 66000);
-       I915_WRITE(GEN6_RP_DOWN_EI, 350000);
-
-       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-       I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
+       /* Initialize rps frequencies */
+       gen6_init_rps_frequencies(dev);
 
-       I915_WRITE(GEN6_RP_CONTROL,
-                  GEN6_RP_MEDIA_TURBO |
-                  GEN6_RP_MEDIA_HW_NORMAL_MODE |
-                  GEN6_RP_MEDIA_IS_GFX |
-                  GEN6_RP_ENABLE |
-                  GEN6_RP_UP_BUSY_AVG |
-                  GEN6_RP_DOWN_IDLE_CONT);
+       /* disable the counters and set deterministic thresholds */
+       I915_WRITE(GEN6_RC_CONTROL, 0);
 
-       I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
+       I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
+       I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
+       I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
        I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
        I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
 
        for_each_ring(ring, dev_priv, i)
                I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
 
-       I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
-
-       /* allows RC6 residency counter to work */
-       I915_WRITE(VLV_COUNTER_CONTROL,
-                  _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
-                                     VLV_RENDER_RC0_COUNT_EN |
-                                     VLV_MEDIA_RC6_COUNT_EN |
-                                     VLV_RENDER_RC6_COUNT_EN));
-
-       if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
-               rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
+       I915_WRITE(GEN6_RC_SLEEP, 0);
+       I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
+       if (IS_IVYBRIDGE(dev))
+               I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
+       else
+               I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
+       I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
+       I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
 
-       intel_print_rc6_info(dev, rc6_mode);
+       /* Check if we are enabling RC6 */
+       rc6_mode = intel_enable_rc6(dev_priv->dev);
+       if (rc6_mode & INTEL_RC6_ENABLE)
+               rc6_mask |= GEN6_RC_CTL_RC6_ENABLE;
 
-       I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
+       /* We don't use those on Haswell */
+       if (!IS_HASWELL(dev)) {
+               if (rc6_mode & INTEL_RC6p_ENABLE)
+                       rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
 
-       val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+               if (rc6_mode & INTEL_RC6pp_ENABLE)
+                       rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
+       }
 
-       DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 0x10 ? "yes" : "no");
-       DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+       intel_print_rc6_info(dev, rc6_mask);
 
-       dev_priv->rps.cur_freq = (val >> 8) & 0xff;
-       DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
-                        dev_priv->rps.cur_freq);
+       I915_WRITE(GEN6_RC_CONTROL,
+                  rc6_mask |
+                  GEN6_RC_CTL_EI_MODE(1) |
+                  GEN6_RC_CTL_HW_ENABLE);
 
-       DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
-                        vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
-                        dev_priv->rps.efficient_freq);
+       /* Power down if completely idle for over 50ms */
+       I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
+       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
 
-       valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
+       ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
+       if (ret)
+               DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
 
-       gen6_enable_rps_interrupts(dev);
+       ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox);
+       if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */
+               DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n",
+                                (dev_priv->rps.max_freq_softlimit & 0xff) * 50,
+                                (pcu_mbox & 0xff) * 50);
+               dev_priv->rps.max_freq = pcu_mbox & 0xff;
+       }
 
-       gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
-}
+       dev_priv->rps.power = HIGH_POWER; /* force a reset */
+       gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit);
 
-void ironlake_teardown_rc6(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       if (dev_priv->ips.renderctx) {
-               i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx);
-               drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
-               dev_priv->ips.renderctx = NULL;
+       rc6vids = 0;
+       ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
+       if (IS_GEN6(dev) && ret) {
+               DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
+       } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
+               DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
+                         GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
+               rc6vids &= 0xffff00;
+               rc6vids |= GEN6_ENCODE_RC6_VID(450);
+               ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
+               if (ret)
+                       DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
        }
 
-       if (dev_priv->ips.pwrctx) {
-               i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx);
-               drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
-               dev_priv->ips.pwrctx = NULL;
-       }
+       gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 }
 
-static void ironlake_disable_rc6(struct drm_device *dev)
+static void __gen6_update_ring_freq(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
+       int min_freq = 15;
+       unsigned int gpu_freq;
+       unsigned int max_ia_freq, min_ring_freq;
+       int scaling_factor = 180;
+       struct cpufreq_policy *policy;
 
-       if (I915_READ(PWRCTXA)) {
-               /* Wake the GPU, prevent RC6, then restore RSTDBYCTL */
-               I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT);
-               wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON),
-                        50);
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
-               I915_WRITE(PWRCTXA, 0);
-               POSTING_READ(PWRCTXA);
+       policy = cpufreq_cpu_get(0);
+       if (policy) {
+               max_ia_freq = policy->cpuinfo.max_freq;
+               cpufreq_cpu_put(policy);
+       } else {
+               /*
+                * Default to measured freq if none found, PCU will ensure we
+                * don't go over
+                */
+               max_ia_freq = tsc_khz;
+       }
 
-               I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
-               POSTING_READ(RSTDBYCTL);
+       /* Convert from kHz to MHz */
+       max_ia_freq /= 1000;
+
+       min_ring_freq = I915_READ(DCLK) & 0xf;
+       /* convert DDR frequency from units of 266.6MHz to bandwidth */
+       min_ring_freq = mult_frac(min_ring_freq, 8, 3);
+
+       /*
+        * For each potential GPU frequency, load a ring frequency we'd like
+        * to use for memory access.  We do this by specifying the IA frequency
+        * the PCU should use as a reference to determine the ring frequency.
+        */
+       for (gpu_freq = dev_priv->rps.max_freq; gpu_freq >= dev_priv->rps.min_freq;
+            gpu_freq--) {
+               int diff = dev_priv->rps.max_freq - gpu_freq;
+               unsigned int ia_freq = 0, ring_freq = 0;
+
+               if (INTEL_INFO(dev)->gen >= 8) {
+                       /* max(2 * GT, DDR). NB: GT is 50MHz units */
+                       ring_freq = max(min_ring_freq, gpu_freq);
+               } else if (IS_HASWELL(dev)) {
+                       ring_freq = mult_frac(gpu_freq, 5, 4);
+                       ring_freq = max(min_ring_freq, ring_freq);
+                       /* leave ia_freq as the default, chosen by cpufreq */
+               } else {
+                       /* On older processors, there is no separate ring
+                        * clock domain, so in order to boost the bandwidth
+                        * of the ring, we need to upclock the CPU (ia_freq).
+                        *
+                        * For GPU frequencies less than 750MHz,
+                        * just use the lowest ring freq.
+                        */
+                       if (gpu_freq < min_freq)
+                               ia_freq = 800;
+                       else
+                               ia_freq = max_ia_freq - ((diff * scaling_factor) / 2);
+                       ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+               }
+
+               sandybridge_pcode_write(dev_priv,
+                                       GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
+                                       ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
+                                       ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
+                                       gpu_freq);
        }
 }
 
-static int ironlake_setup_rc6(struct drm_device *dev)
+void gen6_update_ring_freq(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (dev_priv->ips.renderctx == NULL)
-               dev_priv->ips.renderctx = intel_alloc_context_page(dev);
-       if (!dev_priv->ips.renderctx)
-               return -ENOMEM;
-
-       if (dev_priv->ips.pwrctx == NULL)
-               dev_priv->ips.pwrctx = intel_alloc_context_page(dev);
-       if (!dev_priv->ips.pwrctx) {
-               ironlake_teardown_rc6(dev);
-               return -ENOMEM;
-       }
+       if (INTEL_INFO(dev)->gen < 6 || IS_VALLEYVIEW(dev))
+               return;
 
-       return 0;
+       mutex_lock(&dev_priv->rps.hw_lock);
+       __gen6_update_ring_freq(dev);
+       mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
-static void ironlake_enable_rc6(struct drm_device *dev)
+static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       struct intel_engine_cs *ring = &dev_priv->ring[RCS];
-       bool was_interruptible;
-       int ret;
+       u32 val, rp0;
 
-       /* rc6 disabled by default due to repeated reports of hanging during
-        * boot and resume.
-        */
-       if (!intel_enable_rc6(dev))
-               return;
+       val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
+       rp0 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK;
 
-       WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+       return rp0;
+}
 
-       ret = ironlake_setup_rc6(dev);
-       if (ret)
-               return;
+static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv)
+{
+       u32 val, rpe;
 
-       was_interruptible = dev_priv->mm.interruptible;
-       dev_priv->mm.interruptible = false;
+       val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG);
+       rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
 
-       /*
-        * GPU can automatically power down the render unit if given a page
-        * to save state.
-        */
-       ret = intel_ring_begin(ring, 6);
-       if (ret) {
-               ironlake_teardown_rc6(dev);
-               dev_priv->mm.interruptible = was_interruptible;
-               return;
-       }
+       return rpe;
+}
 
-       intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
-       intel_ring_emit(ring, MI_SET_CONTEXT);
-       intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) |
-                       MI_MM_SPACE_GTT |
-                       MI_SAVE_EXT_STATE_EN |
-                       MI_RESTORE_EXT_STATE_EN |
-                       MI_RESTORE_INHIBIT);
-       intel_ring_emit(ring, MI_SUSPEND_FLUSH);
-       intel_ring_emit(ring, MI_NOOP);
-       intel_ring_emit(ring, MI_FLUSH);
-       intel_ring_advance(ring);
+static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
+{
+       u32 val, rp1;
 
-       /*
-        * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
-        * does an implicit flush, combined with MI_FLUSH above, it should be
-        * safe to assume that renderctx is valid
-        */
-       ret = intel_ring_idle(ring);
-       dev_priv->mm.interruptible = was_interruptible;
-       if (ret) {
-               DRM_ERROR("failed to enable ironlake power savings\n");
-               ironlake_teardown_rc6(dev);
-               return;
-       }
+       val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+       rp1 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK;
 
-       I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN);
-       I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
+       return rp1;
+}
 
-       intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE);
+static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
+{
+       u32 val, rpn;
+
+       val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
+       rpn = (val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) & PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK;
+       return rpn;
 }
 
-static unsigned long intel_pxfreq(u32 vidfreq)
+static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
 {
-       unsigned long freq;
-       int div = (vidfreq & 0x3f0000) >> 16;
-       int post = (vidfreq & 0x3000) >> 12;
-       int pre = (vidfreq & 0x7);
+       u32 val, rp1;
 
-       if (!pre)
-               return 0;
+       val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
 
-       freq = ((div * 133333) / ((1<<post) * pre));
+       rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
 
-       return freq;
+       return rp1;
 }
 
-static const struct cparams {
-       u16 i;
-       u16 t;
-       u16 m;
-       u16 c;
-} cparams[] = {
-       { 1, 1333, 301, 28664 },
-       { 1, 1066, 294, 24460 },
-       { 1, 800, 294, 25192 },
-       { 0, 1333, 276, 27605 },
-       { 0, 1066, 276, 27605 },
-       { 0, 800, 231, 23784 },
-};
-
-static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
+static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv)
 {
-       u64 total_count, diff, ret;
-       u32 count1, count2, count3, m = 0, c = 0;
-       unsigned long now = jiffies_to_msecs(jiffies), diff1;
-       int i;
+       u32 val, rp0;
 
-       assert_spin_locked(&mchdev_lock);
+       val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE);
 
-       diff1 = now - dev_priv->ips.last_time1;
+       rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
+       /* Clamp to max */
+       rp0 = min_t(u32, rp0, 0xea);
 
-       /* Prevent division-by-zero if we are asking too fast.
-        * Also, we don't get interesting results if we are polling
-        * faster than once in 10ms, so just return the saved value
-        * in such cases.
-        */
-       if (diff1 <= 10)
-               return dev_priv->ips.chipset_power;
+       return rp0;
+}
 
-       count1 = I915_READ(DMIEC);
-       count2 = I915_READ(DDREC);
-       count3 = I915_READ(CSIEC);
+static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv)
+{
+       u32 val, rpe;
 
-       total_count = count1 + count2 + count3;
+       val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
+       rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
+       val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
+       rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
 
-       /* FIXME: handle per-counter overflow */
-       if (total_count < dev_priv->ips.last_count1) {
-               diff = ~0UL - dev_priv->ips.last_count1;
-               diff += total_count;
-       } else {
-               diff = total_count - dev_priv->ips.last_count1;
-       }
-
-       for (i = 0; i < ARRAY_SIZE(cparams); i++) {
-               if (cparams[i].i == dev_priv->ips.c_m &&
-                   cparams[i].t == dev_priv->ips.r_t) {
-                       m = cparams[i].m;
-                       c = cparams[i].c;
-                       break;
-               }
-       }
-
-       diff = div_u64(diff, diff1);
-       ret = ((m * diff) + c);
-       ret = div_u64(ret, 10);
-
-       dev_priv->ips.last_count1 = total_count;
-       dev_priv->ips.last_time1 = now;
-
-       dev_priv->ips.chipset_power = ret;
-
-       return ret;
+       return rpe;
 }
 
-unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
+static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv)
 {
-       struct drm_device *dev = dev_priv->dev;
-       unsigned long val;
+       return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff;
+}
 
-       if (INTEL_INFO(dev)->gen != 5)
-               return 0;
+/* Check that the pctx buffer wasn't move under us. */
+static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
+{
+       unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
 
-       spin_lock_irq(&mchdev_lock);
+       WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
+                            dev_priv->vlv_pctx->stolen->start);
+}
 
-       val = __i915_chipset_val(dev_priv);
 
-       spin_unlock_irq(&mchdev_lock);
+/* Check that the pcbr address is not empty. */
+static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
+{
+       unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
 
-       return val;
+       WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0);
 }
 
-unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
+static void cherryview_setup_pctx(struct drm_device *dev)
 {
-       unsigned long m, x, b;
-       u32 tsfs;
-
-       tsfs = I915_READ(TSFS);
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       unsigned long pctx_paddr, paddr;
+       struct i915_gtt *gtt = &dev_priv->gtt;
+       u32 pcbr;
+       int pctx_size = 32*1024;
 
-       m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
-       x = I915_READ8(TR1);
+       WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-       b = tsfs & TSFS_INTR_MASK;
+       pcbr = I915_READ(VLV_PCBR);
+       if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
+               DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+               paddr = (dev_priv->mm.stolen_base +
+                        (gtt->stolen_size - pctx_size));
 
-       return ((m * x) / 127) - b;
-}
+               pctx_paddr = (paddr & (~4095));
+               I915_WRITE(VLV_PCBR, pctx_paddr);
+       }
 
-static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
-{
-       struct drm_device *dev = dev_priv->dev;
-       static const struct v_table {
-               u16 vd; /* in .1 mil */
-               u16 vm; /* in .1 mil */
-       } v_table[] = {
-               { 0, 0, },
-               { 375, 0, },
-               { 500, 0, },
-               { 625, 0, },
-               { 750, 0, },
-               { 875, 0, },
-               { 1000, 0, },
-               { 1125, 0, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4125, 3000, },
-               { 4250, 3125, },
-               { 4375, 3250, },
-               { 4500, 3375, },
-               { 4625, 3500, },
-               { 4750, 3625, },
-               { 4875, 3750, },
-               { 5000, 3875, },
-               { 5125, 4000, },
-               { 5250, 4125, },
-               { 5375, 4250, },
-               { 5500, 4375, },
-               { 5625, 4500, },
-               { 5750, 4625, },
-               { 5875, 4750, },
-               { 6000, 4875, },
-               { 6125, 5000, },
-               { 6250, 5125, },
-               { 6375, 5250, },
-               { 6500, 5375, },
-               { 6625, 5500, },
-               { 6750, 5625, },
-               { 6875, 5750, },
-               { 7000, 5875, },
-               { 7125, 6000, },
-               { 7250, 6125, },
-               { 7375, 6250, },
-               { 7500, 6375, },
-               { 7625, 6500, },
-               { 7750, 6625, },
-               { 7875, 6750, },
-               { 8000, 6875, },
-               { 8125, 7000, },
-               { 8250, 7125, },
-               { 8375, 7250, },
-               { 8500, 7375, },
-               { 8625, 7500, },
-               { 8750, 7625, },
-               { 8875, 7750, },
-               { 9000, 7875, },
-               { 9125, 8000, },
-               { 9250, 8125, },
-               { 9375, 8250, },
-               { 9500, 8375, },
-               { 9625, 8500, },
-               { 9750, 8625, },
-               { 9875, 8750, },
-               { 10000, 8875, },
-               { 10125, 9000, },
-               { 10250, 9125, },
-               { 10375, 9250, },
-               { 10500, 9375, },
-               { 10625, 9500, },
-               { 10750, 9625, },
-               { 10875, 9750, },
-               { 11000, 9875, },
-               { 11125, 10000, },
-               { 11250, 10125, },
-               { 11375, 10250, },
-               { 11500, 10375, },
-               { 11625, 10500, },
-               { 11750, 10625, },
-               { 11875, 10750, },
-               { 12000, 10875, },
-               { 12125, 11000, },
-               { 12250, 11125, },
-               { 12375, 11250, },
-               { 12500, 11375, },
-               { 12625, 11500, },
-               { 12750, 11625, },
-               { 12875, 11750, },
-               { 13000, 11875, },
-               { 13125, 12000, },
-               { 13250, 12125, },
-               { 13375, 12250, },
-               { 13500, 12375, },
-               { 13625, 12500, },
-               { 13750, 12625, },
-               { 13875, 12750, },
-               { 14000, 12875, },
-               { 14125, 13000, },
-               { 14250, 13125, },
-               { 14375, 13250, },
-               { 14500, 13375, },
-               { 14625, 13500, },
-               { 14750, 13625, },
-               { 14875, 13750, },
-               { 15000, 13875, },
-               { 15125, 14000, },
-               { 15250, 14125, },
-               { 15375, 14250, },
-               { 15500, 14375, },
-               { 15625, 14500, },
-               { 15750, 14625, },
-               { 15875, 14750, },
-               { 16000, 14875, },
-               { 16125, 15000, },
-       };
-       if (INTEL_INFO(dev)->is_mobile)
-               return v_table[pxvid].vm;
-       else
-               return v_table[pxvid].vd;
+       DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
 }
 
-static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
+static void valleyview_setup_pctx(struct drm_device *dev)
 {
-       u64 now, diff, diffms;
-       u32 count;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_i915_gem_object *pctx;
+       unsigned long pctx_paddr;
+       u32 pcbr;
+       int pctx_size = 24*1024;
 
-       assert_spin_locked(&mchdev_lock);
+       WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-       now = ktime_get_raw_ns();
-       diffms = now - dev_priv->ips.last_time2;
-       do_div(diffms, NSEC_PER_MSEC);
+       pcbr = I915_READ(VLV_PCBR);
+       if (pcbr) {
+               /* BIOS set it up already, grab the pre-alloc'd space */
+               int pcbr_offset;
 
-       /* Don't divide by 0 */
-       if (!diffms)
-               return;
+               pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
+               pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
+                                                                     pcbr_offset,
+                                                                     I915_GTT_OFFSET_NONE,
+                                                                     pctx_size);
+               goto out;
+       }
 
-       count = I915_READ(GFXEC);
+       DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
 
-       if (count < dev_priv->ips.last_count2) {
-               diff = ~0UL - dev_priv->ips.last_count2;
-               diff += count;
-       } else {
-               diff = count - dev_priv->ips.last_count2;
+       /*
+        * From the Gunit register HAS:
+        * The Gfx driver is expected to program this register and ensure
+        * proper allocation within Gfx stolen memory.  For example, this
+        * register should be programmed such than the PCBR range does not
+        * overlap with other ranges, such as the frame buffer, protected
+        * memory, or any other relevant ranges.
+        */
+       pctx = i915_gem_object_create_stolen(dev, pctx_size);
+       if (!pctx) {
+               DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
+               return;
        }
 
-       dev_priv->ips.last_count2 = count;
-       dev_priv->ips.last_time2 = now;
+       pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
+       I915_WRITE(VLV_PCBR, pctx_paddr);
 
-       /* More magic constants... */
-       diff = diff * 1181;
-       diff = div_u64(diff, diffms * 10);
-       dev_priv->ips.gfx_power = diff;
+out:
+       DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
+       dev_priv->vlv_pctx = pctx;
 }
 
-void i915_update_gfx_val(struct drm_i915_private *dev_priv)
+static void valleyview_cleanup_pctx(struct drm_device *dev)
 {
-       struct drm_device *dev = dev_priv->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (INTEL_INFO(dev)->gen != 5)
+       if (WARN_ON(!dev_priv->vlv_pctx))
                return;
 
-       spin_lock_irq(&mchdev_lock);
-
-       __i915_update_gfx_val(dev_priv);
-
-       spin_unlock_irq(&mchdev_lock);
+       drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
+       dev_priv->vlv_pctx = NULL;
 }
 
-static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
+static void valleyview_init_gt_powersave(struct drm_device *dev)
 {
-       unsigned long t, corr, state1, corr2, state2;
-       u32 pxvid, ext_v;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 val;
 
-       assert_spin_locked(&mchdev_lock);
+       valleyview_setup_pctx(dev);
 
-       pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_freq * 4));
-       pxvid = (pxvid >> 24) & 0x7f;
-       ext_v = pvid_to_extvid(dev_priv, pxvid);
+       mutex_lock(&dev_priv->rps.hw_lock);
 
-       state1 = ext_v;
+       val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+       switch ((val >> 6) & 3) {
+       case 0:
+       case 1:
+               dev_priv->mem_freq = 800;
+               break;
+       case 2:
+               dev_priv->mem_freq = 1066;
+               break;
+       case 3:
+               dev_priv->mem_freq = 1333;
+               break;
+       }
+       DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
 
-       t = i915_mch_val(dev_priv);
+       dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv);
+       dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
+       DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+                        vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq),
+                        dev_priv->rps.max_freq);
 
-       /* Revel in the empirically derived constants */
+       dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv);
+       DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+                        vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
+                        dev_priv->rps.efficient_freq);
 
-       /* Correction factor in 1/100000 units */
-       if (t > 80)
-               corr = ((t * 2349) + 135940);
-       else if (t >= 50)
-               corr = ((t * 964) + 29317);
-       else /* < 50 */
-               corr = ((t * 301) + 1004);
+       dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv);
+       DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
+                        vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
+                        dev_priv->rps.rp1_freq);
 
-       corr = corr * ((150142 * state1) / 10000 - 78642);
-       corr /= 100000;
-       corr2 = (corr * dev_priv->ips.corr);
+       dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv);
+       DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+                        vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq),
+                        dev_priv->rps.min_freq);
 
-       state2 = (corr2 * state1) / 10000;
-       state2 /= 100; /* convert to mW */
+       /* Preserve min/max settings in case of re-init */
+       if (dev_priv->rps.max_freq_softlimit == 0)
+               dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
 
-       __i915_update_gfx_val(dev_priv);
+       if (dev_priv->rps.min_freq_softlimit == 0)
+               dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
 
-       return dev_priv->ips.gfx_power + state2;
+       mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
-unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
+static void cherryview_init_gt_powersave(struct drm_device *dev)
 {
-       struct drm_device *dev = dev_priv->dev;
-       unsigned long val;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 val;
 
-       if (INTEL_INFO(dev)->gen != 5)
-               return 0;
+       cherryview_setup_pctx(dev);
 
-       spin_lock_irq(&mchdev_lock);
+       mutex_lock(&dev_priv->rps.hw_lock);
 
-       val = __i915_gfx_val(dev_priv);
+       mutex_lock(&dev_priv->dpio_lock);
+       val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
+       mutex_unlock(&dev_priv->dpio_lock);
 
-       spin_unlock_irq(&mchdev_lock);
+       switch ((val >> 2) & 0x7) {
+       case 0:
+       case 1:
+               dev_priv->rps.cz_freq = 200;
+               dev_priv->mem_freq = 1600;
+               break;
+       case 2:
+               dev_priv->rps.cz_freq = 267;
+               dev_priv->mem_freq = 1600;
+               break;
+       case 3:
+               dev_priv->rps.cz_freq = 333;
+               dev_priv->mem_freq = 2000;
+               break;
+       case 4:
+               dev_priv->rps.cz_freq = 320;
+               dev_priv->mem_freq = 1600;
+               break;
+       case 5:
+               dev_priv->rps.cz_freq = 400;
+               dev_priv->mem_freq = 1600;
+               break;
+       }
+       DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq);
 
-       return val;
-}
+       dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv);
+       dev_priv->rps.rp0_freq = dev_priv->rps.max_freq;
+       DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+                        vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq),
+                        dev_priv->rps.max_freq);
 
-/**
- * i915_read_mch_val - return value for IPS use
- *
- * Calculate and return a value for the IPS driver to use when deciding whether
- * we have thermal and power headroom to increase CPU or GPU power budget.
- */
-unsigned long i915_read_mch_val(void)
-{
-       struct drm_i915_private *dev_priv;
-       unsigned long chipset_val, graphics_val, ret = 0;
+       dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv);
+       DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+                        vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
+                        dev_priv->rps.efficient_freq);
 
-       spin_lock_irq(&mchdev_lock);
-       if (!i915_mch_dev)
-               goto out_unlock;
-       dev_priv = i915_mch_dev;
+       dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv);
+       DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
+                        vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
+                        dev_priv->rps.rp1_freq);
 
-       chipset_val = __i915_chipset_val(dev_priv);
-       graphics_val = __i915_gfx_val(dev_priv);
+       dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv);
+       DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+                        vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq),
+                        dev_priv->rps.min_freq);
 
-       ret = chipset_val + graphics_val;
+       WARN_ONCE((dev_priv->rps.max_freq |
+                  dev_priv->rps.efficient_freq |
+                  dev_priv->rps.rp1_freq |
+                  dev_priv->rps.min_freq) & 1,
+                 "Odd GPU freq values\n");
 
-out_unlock:
-       spin_unlock_irq(&mchdev_lock);
+       /* Preserve min/max settings in case of re-init */
+       if (dev_priv->rps.max_freq_softlimit == 0)
+               dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
 
-       return ret;
+       if (dev_priv->rps.min_freq_softlimit == 0)
+               dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
+
+       mutex_unlock(&dev_priv->rps.hw_lock);
 }
-EXPORT_SYMBOL_GPL(i915_read_mch_val);
 
-/**
- * i915_gpu_raise - raise GPU frequency limit
- *
- * Raise the limit; IPS indicates we have thermal headroom.
- */
-bool i915_gpu_raise(void)
+static void valleyview_cleanup_gt_powersave(struct drm_device *dev)
 {
-       struct drm_i915_private *dev_priv;
-       bool ret = true;
-
-       spin_lock_irq(&mchdev_lock);
-       if (!i915_mch_dev) {
-               ret = false;
-               goto out_unlock;
-       }
-       dev_priv = i915_mch_dev;
-
-       if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
-               dev_priv->ips.max_delay--;
-
-out_unlock:
-       spin_unlock_irq(&mchdev_lock);
-
-       return ret;
+       valleyview_cleanup_pctx(dev);
 }
-EXPORT_SYMBOL_GPL(i915_gpu_raise);
 
-/**
- * i915_gpu_lower - lower GPU frequency limit
- *
- * IPS indicates we're close to a thermal limit, so throttle back the GPU
- * frequency maximum.
- */
-bool i915_gpu_lower(void)
+static void cherryview_enable_rps(struct drm_device *dev)
 {
-       struct drm_i915_private *dev_priv;
-       bool ret = true;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_engine_cs *ring;
+       u32 gtfifodbg, val, rc6_mode = 0, pcbr;
+       int i;
 
-       spin_lock_irq(&mchdev_lock);
-       if (!i915_mch_dev) {
-               ret = false;
-               goto out_unlock;
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+       gtfifodbg = I915_READ(GTFIFODBG);
+       if (gtfifodbg) {
+               DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
+                                gtfifodbg);
+               I915_WRITE(GTFIFODBG, gtfifodbg);
        }
-       dev_priv = i915_mch_dev;
 
-       if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
-               dev_priv->ips.max_delay++;
+       cherryview_check_pctx(dev_priv);
 
-out_unlock:
-       spin_unlock_irq(&mchdev_lock);
+       /* 1a & 1b: Get forcewake during program sequence. Although the driver
+        * hasn't enabled a state yet where we need forcewake, BIOS may have.*/
+       gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
 
-       return ret;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_lower);
+       /* 2a: Program RC6 thresholds.*/
+       I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+       I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+       I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
 
-/**
- * i915_gpu_busy - indicate GPU business to IPS
- *
- * Tell the IPS driver whether or not the GPU is busy.
- */
-bool i915_gpu_busy(void)
-{
-       struct drm_i915_private *dev_priv;
-       struct intel_engine_cs *ring;
-       bool ret = false;
-       int i;
+       for_each_ring(ring, dev_priv, i)
+               I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
+       I915_WRITE(GEN6_RC_SLEEP, 0);
 
-       spin_lock_irq(&mchdev_lock);
-       if (!i915_mch_dev)
-               goto out_unlock;
-       dev_priv = i915_mch_dev;
+       /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */
+       I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
 
-       for_each_ring(ring, dev_priv, i)
-               ret |= !list_empty(&ring->request_list);
+       /* allows RC6 residency counter to work */
+       I915_WRITE(VLV_COUNTER_CONTROL,
+                  _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+                                     VLV_MEDIA_RC6_COUNT_EN |
+                                     VLV_RENDER_RC6_COUNT_EN));
 
-out_unlock:
-       spin_unlock_irq(&mchdev_lock);
+       /* For now we assume BIOS is allocating and populating the PCBR  */
+       pcbr = I915_READ(VLV_PCBR);
 
-       return ret;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_busy);
+       /* 3: Enable RC6 */
+       if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) &&
+                                               (pcbr >> VLV_PCBR_ADDR_SHIFT))
+               rc6_mode = GEN7_RC_CTL_TO_MODE;
 
-/**
- * i915_gpu_turbo_disable - disable graphics turbo
- *
- * Disable graphics turbo by resetting the max frequency and setting the
- * current frequency to the default.
- */
-bool i915_gpu_turbo_disable(void)
-{
-       struct drm_i915_private *dev_priv;
-       bool ret = true;
+       I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
 
-       spin_lock_irq(&mchdev_lock);
-       if (!i915_mch_dev) {
-               ret = false;
-               goto out_unlock;
-       }
-       dev_priv = i915_mch_dev;
+       /* 4 Program defaults and thresholds for RPS*/
+       I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
+       I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
+       I915_WRITE(GEN6_RP_UP_EI, 66000);
+       I915_WRITE(GEN6_RP_DOWN_EI, 350000);
 
-       dev_priv->ips.max_delay = dev_priv->ips.fstart;
+       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
 
-       if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
-               ret = false;
+       /* WaDisablePwrmtrEvent:chv (pre-production hw) */
+       I915_WRITE(0xA80C, I915_READ(0xA80C) & 0x00ffffff);
+       I915_WRITE(0xA810, I915_READ(0xA810) & 0xffffff00);
 
-out_unlock:
-       spin_unlock_irq(&mchdev_lock);
+       /* 5: Enable RPS */
+       I915_WRITE(GEN6_RP_CONTROL,
+                  GEN6_RP_MEDIA_HW_NORMAL_MODE |
+                  GEN6_RP_MEDIA_IS_GFX | /* WaSetMaskForGfxBusyness:chv (pre-production hw ?) */
+                  GEN6_RP_ENABLE |
+                  GEN6_RP_UP_BUSY_AVG |
+                  GEN6_RP_DOWN_IDLE_AVG);
 
-       return ret;
-}
-EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
+       val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 
-/**
- * Tells the intel_ips driver that the i915 driver is now loaded, if
- * IPS got loaded first.
- *
- * This awkward dance is so that neither module has to depend on the
- * other in order for IPS to do the appropriate communication of
- * GPU turbo limits to i915.
- */
-static void
-ips_ping_for_i915_load(void)
-{
-       void (*link)(void);
+       /* RPS code assumes GPLL is used */
+       WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
 
-       link = symbol_get(ips_link_to_i915_driver);
-       if (link) {
-               link();
-               symbol_put(ips_link_to_i915_driver);
-       }
-}
+       DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no");
+       DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
 
-void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
-{
-       /* We only register the i915 ips part with intel-ips once everything is
-        * set up, to avoid intel-ips sneaking in and reading bogus values. */
-       spin_lock_irq(&mchdev_lock);
-       i915_mch_dev = dev_priv;
-       spin_unlock_irq(&mchdev_lock);
+       dev_priv->rps.cur_freq = (val >> 8) & 0xff;
+       DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
+                        vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
+                        dev_priv->rps.cur_freq);
 
-       ips_ping_for_i915_load();
-}
+       DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
+                        vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
+                        dev_priv->rps.efficient_freq);
 
-void intel_gpu_ips_teardown(void)
-{
-       spin_lock_irq(&mchdev_lock);
-       i915_mch_dev = NULL;
-       spin_unlock_irq(&mchdev_lock);
+       valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
+
+       gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 }
 
-static void intel_init_emon(struct drm_device *dev)
+static void valleyview_enable_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 lcfuse;
-       u8 pxw[16];
+       struct intel_engine_cs *ring;
+       u32 gtfifodbg, val, rc6_mode = 0;
        int i;
 
-       /* Disable to program */
-       I915_WRITE(ECR, 0);
-       POSTING_READ(ECR);
-
-       /* Program energy weights for various events */
-       I915_WRITE(SDEW, 0x15040d00);
-       I915_WRITE(CSIEW0, 0x007f0000);
-       I915_WRITE(CSIEW1, 0x1e220004);
-       I915_WRITE(CSIEW2, 0x04000004);
+       WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
-       for (i = 0; i < 5; i++)
-               I915_WRITE(PEW + (i * 4), 0);
-       for (i = 0; i < 3; i++)
-               I915_WRITE(DEW + (i * 4), 0);
+       valleyview_check_pctx(dev_priv);
 
-       /* Program P-state weights to account for frequency power adjustment */
-       for (i = 0; i < 16; i++) {
-               u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4));
-               unsigned long freq = intel_pxfreq(pxvidfreq);
-               unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
-                       PXVFREQ_PX_SHIFT;
-               unsigned long val;
-
-               val = vid * vid;
-               val *= (freq / 1000);
-               val *= 255;
-               val /= (127*127*900);
-               if (val > 0xff)
-                       DRM_ERROR("bad pxval: %ld\n", val);
-               pxw[i] = val;
-       }
-       /* Render standby states get 0 weight */
-       pxw[14] = 0;
-       pxw[15] = 0;
-
-       for (i = 0; i < 4; i++) {
-               u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
-                       (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
-               I915_WRITE(PXW + (i * 4), val);
-       }
-
-       /* Adjust magic regs to magic values (more experimental results) */
-       I915_WRITE(OGW0, 0);
-       I915_WRITE(OGW1, 0);
-       I915_WRITE(EG0, 0x00007f00);
-       I915_WRITE(EG1, 0x0000000e);
-       I915_WRITE(EG2, 0x000e0000);
-       I915_WRITE(EG3, 0x68000300);
-       I915_WRITE(EG4, 0x42000000);
-       I915_WRITE(EG5, 0x00140031);
-       I915_WRITE(EG6, 0);
-       I915_WRITE(EG7, 0);
-
-       for (i = 0; i < 8; i++)
-               I915_WRITE(PXWL + (i * 4), 0);
-
-       /* Enable PMON + select events */
-       I915_WRITE(ECR, 0x80000019);
-
-       lcfuse = I915_READ(LCFUSE02);
-
-       dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
-}
-
-void intel_init_gt_powersave(struct drm_device *dev)
-{
-       i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
-
-       if (IS_CHERRYVIEW(dev))
-               cherryview_init_gt_powersave(dev);
-       else if (IS_VALLEYVIEW(dev))
-               valleyview_init_gt_powersave(dev);
-}
-
-void intel_cleanup_gt_powersave(struct drm_device *dev)
-{
-       if (IS_CHERRYVIEW(dev))
-               return;
-       else if (IS_VALLEYVIEW(dev))
-               valleyview_cleanup_gt_powersave(dev);
-}
-
-/**
- * intel_suspend_gt_powersave - suspend PM work and helper threads
- * @dev: drm device
- *
- * We don't want to disable RC6 or other features here, we just want
- * to make sure any work we've queued has finished and won't bother
- * us while we're suspended.
- */
-void intel_suspend_gt_powersave(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       /* Interrupts should be disabled already to avoid re-arming. */
-       WARN_ON(intel_irqs_enabled(dev_priv));
-
-       flush_delayed_work(&dev_priv->rps.delayed_resume_work);
-
-       cancel_work_sync(&dev_priv->rps.work);
-
-       /* Force GPU to min freq during suspend */
-       gen6_rps_idle(dev_priv);
-}
-
-void intel_disable_gt_powersave(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       /* Interrupts should be disabled already to avoid re-arming. */
-       WARN_ON(intel_irqs_enabled(dev_priv));
-
-       if (IS_IRONLAKE_M(dev)) {
-               ironlake_disable_drps(dev);
-               ironlake_disable_rc6(dev);
-       } else if (INTEL_INFO(dev)->gen >= 6) {
-               intel_suspend_gt_powersave(dev);
-
-               mutex_lock(&dev_priv->rps.hw_lock);
-               if (IS_CHERRYVIEW(dev))
-                       cherryview_disable_rps(dev);
-               else if (IS_VALLEYVIEW(dev))
-                       valleyview_disable_rps(dev);
-               else
-                       gen6_disable_rps(dev);
-               dev_priv->rps.enabled = false;
-               mutex_unlock(&dev_priv->rps.hw_lock);
-       }
-}
-
-static void intel_gen6_powersave_work(struct work_struct *work)
-{
-       struct drm_i915_private *dev_priv =
-               container_of(work, struct drm_i915_private,
-                            rps.delayed_resume_work.work);
-       struct drm_device *dev = dev_priv->dev;
-
-       mutex_lock(&dev_priv->rps.hw_lock);
-
-       if (IS_CHERRYVIEW(dev)) {
-               cherryview_enable_rps(dev);
-       } else if (IS_VALLEYVIEW(dev)) {
-               valleyview_enable_rps(dev);
-       } else if (IS_BROADWELL(dev)) {
-               gen8_enable_rps(dev);
-               __gen6_update_ring_freq(dev);
-       } else {
-               gen6_enable_rps(dev);
-               __gen6_update_ring_freq(dev);
-       }
-       dev_priv->rps.enabled = true;
-       mutex_unlock(&dev_priv->rps.hw_lock);
-
-       intel_runtime_pm_put(dev_priv);
-}
-
-void intel_enable_gt_powersave(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       if (IS_IRONLAKE_M(dev)) {
-               mutex_lock(&dev->struct_mutex);
-               ironlake_enable_drps(dev);
-               ironlake_enable_rc6(dev);
-               intel_init_emon(dev);
-               mutex_unlock(&dev->struct_mutex);
-       } else if (INTEL_INFO(dev)->gen >= 6) {
-               /*
-                * PCU communication is slow and this doesn't need to be
-                * done at any specific time, so do this out of our fast path
-                * to make resume and init faster.
-                *
-                * We depend on the HW RC6 power context save/restore
-                * mechanism when entering D3 through runtime PM suspend. So
-                * disable RPM until RPS/RC6 is properly setup. We can only
-                * get here via the driver load/system resume/runtime resume
-                * paths, so the _noresume version is enough (and in case of
-                * runtime resume it's necessary).
-                */
-               if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
-                                          round_jiffies_up_relative(HZ)))
-                       intel_runtime_pm_get_noresume(dev_priv);
+       if ((gtfifodbg = I915_READ(GTFIFODBG))) {
+               DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n",
+                                gtfifodbg);
+               I915_WRITE(GTFIFODBG, gtfifodbg);
        }
-}
-
-void intel_reset_gt_powersave(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       dev_priv->rps.enabled = false;
-       intel_enable_gt_powersave(dev);
-}
 
-static void ibx_init_clock_gating(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       /*
-        * On Ibex Peak and Cougar Point, we need to disable clock
-        * gating for the panel power sequencer or it will fail to
-        * start up when no ports are active.
-        */
-       I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
-}
-
-static void g4x_disable_trickle_feed(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       int pipe;
+       /* If VLV, Forcewake all wells, else re-direct to regular path */
+       gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
 
-       for_each_pipe(dev_priv, pipe) {
-               I915_WRITE(DSPCNTR(pipe),
-                          I915_READ(DSPCNTR(pipe)) |
-                          DISPPLANE_TRICKLE_FEED_DISABLE);
-               intel_flush_primary_plane(dev_priv, pipe);
-       }
-}
+       I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
+       I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
+       I915_WRITE(GEN6_RP_UP_EI, 66000);
+       I915_WRITE(GEN6_RP_DOWN_EI, 350000);
 
-static void ilk_init_lp_watermarks(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+       I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
 
-       I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
-       I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
-       I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
+       I915_WRITE(GEN6_RP_CONTROL,
+                  GEN6_RP_MEDIA_TURBO |
+                  GEN6_RP_MEDIA_HW_NORMAL_MODE |
+                  GEN6_RP_MEDIA_IS_GFX |
+                  GEN6_RP_ENABLE |
+                  GEN6_RP_UP_BUSY_AVG |
+                  GEN6_RP_DOWN_IDLE_CONT);
 
-       /*
-        * Don't touch WM1S_LP_EN here.
-        * Doing so could cause underruns.
-        */
-}
+       I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
+       I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000);
+       I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25);
 
-static void ironlake_init_clock_gating(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
+       for_each_ring(ring, dev_priv, i)
+               I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10);
 
-       /*
-        * Required for FBC
-        * WaFbcDisableDpfcClockGating:ilk
-        */
-       dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
-                  ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
-                  ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
+       I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
 
-       I915_WRITE(PCH_3DCGDIS0,
-                  MARIUNIT_CLOCK_GATE_DISABLE |
-                  SVSMUNIT_CLOCK_GATE_DISABLE);
-       I915_WRITE(PCH_3DCGDIS1,
-                  VFMUNIT_CLOCK_GATE_DISABLE);
+       /* allows RC6 residency counter to work */
+       I915_WRITE(VLV_COUNTER_CONTROL,
+                  _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
+                                     VLV_RENDER_RC0_COUNT_EN |
+                                     VLV_MEDIA_RC6_COUNT_EN |
+                                     VLV_RENDER_RC6_COUNT_EN));
 
-       /*
-        * According to the spec the following bits should be set in
-        * order to enable memory self-refresh
-        * The bit 22/21 of 0x42004
-        * The bit 5 of 0x42020
-        * The bit 15 of 0x45000
-        */
-       I915_WRITE(ILK_DISPLAY_CHICKEN2,
-                  (I915_READ(ILK_DISPLAY_CHICKEN2) |
-                   ILK_DPARB_GATE | ILK_VSDPFD_FULL));
-       dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
-       I915_WRITE(DISP_ARB_CTL,
-                  (I915_READ(DISP_ARB_CTL) |
-                   DISP_FBC_WM_DIS));
+       if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
+               rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
 
-       ilk_init_lp_watermarks(dev);
+       intel_print_rc6_info(dev, rc6_mode);
 
-       /*
-        * Based on the document from hardware guys the following bits
-        * should be set unconditionally in order to enable FBC.
-        * The bit 22 of 0x42000
-        * The bit 22 of 0x42004
-        * The bit 7,8,9 of 0x42020.
-        */
-       if (IS_IRONLAKE_M(dev)) {
-               /* WaFbcAsynchFlipDisableFbcQueue:ilk */
-               I915_WRITE(ILK_DISPLAY_CHICKEN1,
-                          I915_READ(ILK_DISPLAY_CHICKEN1) |
-                          ILK_FBCQ_DIS);
-               I915_WRITE(ILK_DISPLAY_CHICKEN2,
-                          I915_READ(ILK_DISPLAY_CHICKEN2) |
-                          ILK_DPARB_GATE);
-       }
+       I915_WRITE(GEN6_RC_CONTROL, rc6_mode);
 
-       I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
+       val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 
-       I915_WRITE(ILK_DISPLAY_CHICKEN2,
-                  I915_READ(ILK_DISPLAY_CHICKEN2) |
-                  ILK_ELPIN_409_SELECT);
-       I915_WRITE(_3D_CHICKEN2,
-                  _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
-                  _3D_CHICKEN2_WM_READ_PIPELINED);
+       /* RPS code assumes GPLL is used */
+       WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
 
-       /* WaDisableRenderCachePipelinedFlush:ilk */
-       I915_WRITE(CACHE_MODE_0,
-                  _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
+       DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no");
+       DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
 
-       /* WaDisable_RenderCache_OperationalFlush:ilk */
-       I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+       dev_priv->rps.cur_freq = (val >> 8) & 0xff;
+       DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n",
+                        vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
+                        dev_priv->rps.cur_freq);
 
-       g4x_disable_trickle_feed(dev);
+       DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n",
+                        vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq),
+                        dev_priv->rps.efficient_freq);
 
-       ibx_init_clock_gating(dev);
+       valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq);
+
+       gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 }
 
-static void cpt_init_clock_gating(struct drm_device *dev)
+void ironlake_teardown_rc6(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       int pipe;
-       uint32_t val;
 
-       /*
-        * On Ibex Peak and Cougar Point, we need to disable clock
-        * gating for the panel power sequencer or it will fail to
-        * start up when no ports are active.
-        */
-       I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
-                  PCH_DPLUNIT_CLOCK_GATE_DISABLE |
-                  PCH_CPUNIT_CLOCK_GATE_DISABLE);
-       I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
-                  DPLS_EDP_PPS_FIX_DIS);
-       /* The below fixes the weird display corruption, a few pixels shifted
-        * downward, on (only) LVDS of some HP laptops with IVY.
-        */
-       for_each_pipe(dev_priv, pipe) {
-               val = I915_READ(TRANS_CHICKEN2(pipe));
-               val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
-               val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
-               if (dev_priv->vbt.fdi_rx_polarity_inverted)
-                       val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
-               val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
-               val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
-               val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
-               I915_WRITE(TRANS_CHICKEN2(pipe), val);
+       if (dev_priv->ips.renderctx) {
+               i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx);
+               drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
+               dev_priv->ips.renderctx = NULL;
        }
-       /* WADP0ClockGatingDisable */
-       for_each_pipe(dev_priv, pipe) {
-               I915_WRITE(TRANS_CHICKEN1(pipe),
-                          TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
+
+       if (dev_priv->ips.pwrctx) {
+               i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx);
+               drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
+               dev_priv->ips.pwrctx = NULL;
        }
 }
 
-static void gen6_check_mch_setup(struct drm_device *dev)
+static void ironlake_disable_rc6(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       uint32_t tmp;
 
-       tmp = I915_READ(MCH_SSKPD);
-       if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
-               DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
-                             tmp);
+       if (I915_READ(PWRCTXA)) {
+               /* Wake the GPU, prevent RC6, then restore RSTDBYCTL */
+               I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT);
+               wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON),
+                        50);
+
+               I915_WRITE(PWRCTXA, 0);
+               POSTING_READ(PWRCTXA);
+
+               I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
+               POSTING_READ(RSTDBYCTL);
+       }
 }
 
-static void gen6_init_clock_gating(struct drm_device *dev)
+static int ironlake_setup_rc6(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
 
-       I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
+       if (dev_priv->ips.renderctx == NULL)
+               dev_priv->ips.renderctx = intel_alloc_context_page(dev);
+       if (!dev_priv->ips.renderctx)
+               return -ENOMEM;
 
-       I915_WRITE(ILK_DISPLAY_CHICKEN2,
-                  I915_READ(ILK_DISPLAY_CHICKEN2) |
-                  ILK_ELPIN_409_SELECT);
+       if (dev_priv->ips.pwrctx == NULL)
+               dev_priv->ips.pwrctx = intel_alloc_context_page(dev);
+       if (!dev_priv->ips.pwrctx) {
+               ironlake_teardown_rc6(dev);
+               return -ENOMEM;
+       }
 
-       /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
-       I915_WRITE(_3D_CHICKEN,
-                  _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
+       return 0;
+}
 
-       /* WaDisable_RenderCache_OperationalFlush:snb */
-       I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+static void ironlake_enable_rc6(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct intel_engine_cs *ring = &dev_priv->ring[RCS];
+       bool was_interruptible;
+       int ret;
 
-       /*
-        * BSpec recoomends 8x4 when MSAA is used,
-        * however in practice 16x4 seems fastest.
-        *
-        * Note that PS/WM thread counts depend on the WIZ hashing
-        * disable bit, which we don't touch here, but it's good
-        * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+       /* rc6 disabled by default due to repeated reports of hanging during
+        * boot and resume.
         */
-       I915_WRITE(GEN6_GT_MODE,
-                  GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
-
-       ilk_init_lp_watermarks(dev);
-
-       I915_WRITE(CACHE_MODE_0,
-                  _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
-
-       I915_WRITE(GEN6_UCGCTL1,
-                  I915_READ(GEN6_UCGCTL1) |
-                  GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
-                  GEN6_CSUNIT_CLOCK_GATE_DISABLE);
+       if (!intel_enable_rc6(dev))
+               return;
 
-       /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
-        * gating disable must be set.  Failure to set it results in
-        * flickering pixels due to Z write ordering failures after
-        * some amount of runtime in the Mesa "fire" demo, and Unigine
-        * Sanctuary and Tropics, and apparently anything else with
-        * alpha test or pixel discard.
-        *
-        * According to the spec, bit 11 (RCCUNIT) must also be set,
-        * but we didn't debug actual testcases to find it out.
-        *
-        * WaDisableRCCUnitClockGating:snb
-        * WaDisableRCPBUnitClockGating:snb
-        */
-       I915_WRITE(GEN6_UCGCTL2,
-                  GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
-                  GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
+       WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-       /* WaStripsFansDisableFastClipPerformanceFix:snb */
-       I915_WRITE(_3D_CHICKEN3,
-                  _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
+       ret = ironlake_setup_rc6(dev);
+       if (ret)
+               return;
 
-       /*
-        * Bspec says:
-        * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
-        * 3DSTATE_SF number of SF output attributes is more than 16."
-        */
-       I915_WRITE(_3D_CHICKEN3,
-                  _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
+       was_interruptible = dev_priv->mm.interruptible;
+       dev_priv->mm.interruptible = false;
 
        /*
-        * According to the spec the following bits should be
-        * set in order to enable memory self-refresh and fbc:
-        * The bit21 and bit22 of 0x42000
-        * The bit21 and bit22 of 0x42004
-        * The bit5 and bit7 of 0x42020
-        * The bit14 of 0x70180
-        * The bit14 of 0x71180
-        *
-        * WaFbcAsynchFlipDisableFbcQueue:snb
+        * GPU can automatically power down the render unit if given a page
+        * to save state.
         */
-       I915_WRITE(ILK_DISPLAY_CHICKEN1,
-                  I915_READ(ILK_DISPLAY_CHICKEN1) |
-                  ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
-       I915_WRITE(ILK_DISPLAY_CHICKEN2,
-                  I915_READ(ILK_DISPLAY_CHICKEN2) |
-                  ILK_DPARB_GATE | ILK_VSDPFD_FULL);
-       I915_WRITE(ILK_DSPCLK_GATE_D,
-                  I915_READ(ILK_DSPCLK_GATE_D) |
-                  ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
-                  ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
-
-       g4x_disable_trickle_feed(dev);
-
-       cpt_init_clock_gating(dev);
-
-       gen6_check_mch_setup(dev);
-}
+       ret = intel_ring_begin(ring, 6);
+       if (ret) {
+               ironlake_teardown_rc6(dev);
+               dev_priv->mm.interruptible = was_interruptible;
+               return;
+       }
 
-static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
-{
-       uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
+       intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
+       intel_ring_emit(ring, MI_SET_CONTEXT);
+       intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) |
+                       MI_MM_SPACE_GTT |
+                       MI_SAVE_EXT_STATE_EN |
+                       MI_RESTORE_EXT_STATE_EN |
+                       MI_RESTORE_INHIBIT);
+       intel_ring_emit(ring, MI_SUSPEND_FLUSH);
+       intel_ring_emit(ring, MI_NOOP);
+       intel_ring_emit(ring, MI_FLUSH);
+       intel_ring_advance(ring);
 
        /*
-        * WaVSThreadDispatchOverride:ivb,vlv
-        *
-        * This actually overrides the dispatch
-        * mode for all thread types.
+        * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
+        * does an implicit flush, combined with MI_FLUSH above, it should be
+        * safe to assume that renderctx is valid
         */
-       reg &= ~GEN7_FF_SCHED_MASK;
-       reg |= GEN7_FF_TS_SCHED_HW;
-       reg |= GEN7_FF_VS_SCHED_HW;
-       reg |= GEN7_FF_DS_SCHED_HW;
-
-       I915_WRITE(GEN7_FF_THREAD_MODE, reg);
-}
-
-static void lpt_init_clock_gating(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       ret = intel_ring_idle(ring);
+       dev_priv->mm.interruptible = was_interruptible;
+       if (ret) {
+               DRM_ERROR("failed to enable ironlake power savings\n");
+               ironlake_teardown_rc6(dev);
+               return;
+       }
 
-       /*
-        * TODO: this bit should only be enabled when really needed, then
-        * disabled when not needed anymore in order to save power.
-        */
-       if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
-               I915_WRITE(SOUTH_DSPCLK_GATE_D,
-                          I915_READ(SOUTH_DSPCLK_GATE_D) |
-                          PCH_LP_PARTITION_LEVEL_DISABLE);
+       I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN);
+       I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
 
-       /* WADPOClockGatingDisable:hsw */
-       I915_WRITE(_TRANSA_CHICKEN1,
-                  I915_READ(_TRANSA_CHICKEN1) |
-                  TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
+       intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE);
 }
 
-static void lpt_suspend_hw(struct drm_device *dev)
+static unsigned long intel_pxfreq(u32 vidfreq)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       unsigned long freq;
+       int div = (vidfreq & 0x3f0000) >> 16;
+       int post = (vidfreq & 0x3000) >> 12;
+       int pre = (vidfreq & 0x7);
 
-       if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
-               uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
+       if (!pre)
+               return 0;
 
-               val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
-               I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
-       }
-}
+       freq = ((div * 133333) / ((1<<post) * pre));
 
-static void broadwell_init_clock_gating(struct drm_device *dev)
-{
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       enum pipe pipe;
+       return freq;
+}
 
-       I915_WRITE(WM3_LP_ILK, 0);
-       I915_WRITE(WM2_LP_ILK, 0);
-       I915_WRITE(WM1_LP_ILK, 0);
+static const struct cparams {
+       u16 i;
+       u16 t;
+       u16 m;
+       u16 c;
+} cparams[] = {
+       { 1, 1333, 301, 28664 },
+       { 1, 1066, 294, 24460 },
+       { 1, 800, 294, 25192 },
+       { 0, 1333, 276, 27605 },
+       { 0, 1066, 276, 27605 },
+       { 0, 800, 231, 23784 },
+};
 
-       /* FIXME(BDW): Check all the w/a, some might only apply to
-        * pre-production hw. */
+static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
+{
+       u64 total_count, diff, ret;
+       u32 count1, count2, count3, m = 0, c = 0;
+       unsigned long now = jiffies_to_msecs(jiffies), diff1;
+       int i;
 
+       assert_spin_locked(&mchdev_lock);
 
-       I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
+       diff1 = now - dev_priv->ips.last_time1;
 
-       I915_WRITE(_3D_CHICKEN3,
-                  _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
+       /* Prevent division-by-zero if we are asking too fast.
+        * Also, we don't get interesting results if we are polling
+        * faster than once in 10ms, so just return the saved value
+        * in such cases.
+        */
+       if (diff1 <= 10)
+               return dev_priv->ips.chipset_power;
 
+       count1 = I915_READ(DMIEC);
+       count2 = I915_READ(DDREC);
+       count3 = I915_READ(CSIEC);
 
-       /* WaSwitchSolVfFArbitrationPriority:bdw */
-       I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
+       total_count = count1 + count2 + count3;
 
-       /* WaPsrDPAMaskVBlankInSRD:bdw */
-       I915_WRITE(CHICKEN_PAR1_1,
-                  I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
+       /* FIXME: handle per-counter overflow */
+       if (total_count < dev_priv->ips.last_count1) {
+               diff = ~0UL - dev_priv->ips.last_count1;
+               diff += total_count;
+       } else {
+               diff = total_count - dev_priv->ips.last_count1;
+       }
 
-       /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
-       for_each_pipe(dev_priv, pipe) {
-               I915_WRITE(CHICKEN_PIPESL_1(pipe),
-                          I915_READ(CHICKEN_PIPESL_1(pipe)) |
-                          BDW_DPRS_MASK_VBLANK_SRD);
+       for (i = 0; i < ARRAY_SIZE(cparams); i++) {
+               if (cparams[i].i == dev_priv->ips.c_m &&
+                   cparams[i].t == dev_priv->ips.r_t) {
+                       m = cparams[i].m;
+                       c = cparams[i].c;
+                       break;
+               }
        }
 
-       /* WaVSRefCountFullforceMissDisable:bdw */
-       /* WaDSRefCountFullforceMissDisable:bdw */
-       I915_WRITE(GEN7_FF_THREAD_MODE,
-                  I915_READ(GEN7_FF_THREAD_MODE) &
-                  ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
+       diff = div_u64(diff, diff1);
+       ret = ((m * diff) + c);
+       ret = div_u64(ret, 10);
 
-       I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
-                  _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
+       dev_priv->ips.last_count1 = total_count;
+       dev_priv->ips.last_time1 = now;
 
-       /* WaDisableSDEUnitClockGating:bdw */
-       I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
-                  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+       dev_priv->ips.chipset_power = ret;
 
-       lpt_init_clock_gating(dev);
+       return ret;
 }
 
-static void haswell_init_clock_gating(struct drm_device *dev)
+unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_device *dev = dev_priv->dev;
+       unsigned long val;
 
-       ilk_init_lp_watermarks(dev);
+       if (INTEL_INFO(dev)->gen != 5)
+               return 0;
 
-       /* L3 caching of data atomics doesn't work -- disable it. */
-       I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
-       I915_WRITE(HSW_ROW_CHICKEN3,
-                  _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
+       spin_lock_irq(&mchdev_lock);
 
-       /* This is required by WaCatErrorRejectionIssue:hsw */
-       I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
-                       I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
-                       GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
+       val = __i915_chipset_val(dev_priv);
 
-       /* WaVSRefCountFullforceMissDisable:hsw */
-       I915_WRITE(GEN7_FF_THREAD_MODE,
-                  I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
+       spin_unlock_irq(&mchdev_lock);
 
-       /* WaDisable_RenderCache_OperationalFlush:hsw */
-       I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+       return val;
+}
 
-       /* enable HiZ Raw Stall Optimization */
-       I915_WRITE(CACHE_MODE_0_GEN7,
-                  _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
+unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
+{
+       unsigned long m, x, b;
+       u32 tsfs;
 
-       /* WaDisable4x2SubspanOptimization:hsw */
-       I915_WRITE(CACHE_MODE_1,
-                  _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
+       tsfs = I915_READ(TSFS);
 
-       /*
-        * BSpec recommends 8x4 when MSAA is used,
-        * however in practice 16x4 seems fastest.
-        *
-        * Note that PS/WM thread counts depend on the WIZ hashing
-        * disable bit, which we don't touch here, but it's good
-        * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
-        */
-       I915_WRITE(GEN7_GT_MODE,
-                  GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+       m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
+       x = I915_READ8(TR1);
 
-       /* WaSwitchSolVfFArbitrationPriority:hsw */
-       I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
+       b = tsfs & TSFS_INTR_MASK;
 
-       /* WaRsPkgCStateDisplayPMReq:hsw */
-       I915_WRITE(CHICKEN_PAR1_1,
-                  I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
+       return ((m * x) / 127) - b;
+}
 
-       lpt_init_clock_gating(dev);
+static int _pxvid_to_vd(u8 pxvid)
+{
+       if (pxvid == 0)
+               return 0;
+
+       if (pxvid >= 8 && pxvid < 31)
+               pxvid = 31;
+
+       return (pxvid + 2) * 125;
 }
 
-static void ivybridge_init_clock_gating(struct drm_device *dev)
+static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       uint32_t snpcr;
+       struct drm_device *dev = dev_priv->dev;
+       const int vd = _pxvid_to_vd(pxvid);
+       const int vm = vd - 1125;
 
-       ilk_init_lp_watermarks(dev);
+       if (INTEL_INFO(dev)->is_mobile)
+               return vm > 0 ? vm : 0;
 
-       I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
+       return vd;
+}
 
-       /* WaDisableEarlyCull:ivb */
-       I915_WRITE(_3D_CHICKEN3,
-                  _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
+static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
+{
+       u64 now, diff, diffms;
+       u32 count;
 
-       /* WaDisableBackToBackFlipFix:ivb */
-       I915_WRITE(IVB_CHICKEN3,
-                  CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
-                  CHICKEN3_DGMG_DONE_FIX_DISABLE);
+       assert_spin_locked(&mchdev_lock);
 
-       /* WaDisablePSDDualDispatchEnable:ivb */
-       if (IS_IVB_GT1(dev))
-               I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
-                          _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+       now = ktime_get_raw_ns();
+       diffms = now - dev_priv->ips.last_time2;
+       do_div(diffms, NSEC_PER_MSEC);
 
-       /* WaDisable_RenderCache_OperationalFlush:ivb */
-       I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+       /* Don't divide by 0 */
+       if (!diffms)
+               return;
 
-       /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
-       I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
-                  GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
+       count = I915_READ(GFXEC);
 
-       /* WaApplyL3ControlAndL3ChickenMode:ivb */
-       I915_WRITE(GEN7_L3CNTLREG1,
-                       GEN7_WA_FOR_GEN7_L3_CONTROL);
-       I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
-                  GEN7_WA_L3_CHICKEN_MODE);
-       if (IS_IVB_GT1(dev))
-               I915_WRITE(GEN7_ROW_CHICKEN2,
-                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
-       else {
-               /* must write both registers */
-               I915_WRITE(GEN7_ROW_CHICKEN2,
-                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
-               I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
-                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+       if (count < dev_priv->ips.last_count2) {
+               diff = ~0UL - dev_priv->ips.last_count2;
+               diff += count;
+       } else {
+               diff = count - dev_priv->ips.last_count2;
        }
 
-       /* WaForceL3Serialization:ivb */
-       I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
-                  ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
+       dev_priv->ips.last_count2 = count;
+       dev_priv->ips.last_time2 = now;
 
-       /*
-        * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
-        * This implements the WaDisableRCZUnitClockGating:ivb workaround.
-        */
-       I915_WRITE(GEN6_UCGCTL2,
-                  GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
+       /* More magic constants... */
+       diff = diff * 1181;
+       diff = div_u64(diff, diffms * 10);
+       dev_priv->ips.gfx_power = diff;
+}
 
-       /* This is required by WaCatErrorRejectionIssue:ivb */
-       I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
-                       I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
-                       GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
+void i915_update_gfx_val(struct drm_i915_private *dev_priv)
+{
+       struct drm_device *dev = dev_priv->dev;
+
+       if (INTEL_INFO(dev)->gen != 5)
+               return;
+
+       spin_lock_irq(&mchdev_lock);
+
+       __i915_update_gfx_val(dev_priv);
+
+       spin_unlock_irq(&mchdev_lock);
+}
+
+static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
+{
+       unsigned long t, corr, state1, corr2, state2;
+       u32 pxvid, ext_v;
+
+       assert_spin_locked(&mchdev_lock);
 
-       g4x_disable_trickle_feed(dev);
+       pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_freq * 4));
+       pxvid = (pxvid >> 24) & 0x7f;
+       ext_v = pvid_to_extvid(dev_priv, pxvid);
 
-       gen7_setup_fixed_func_scheduler(dev_priv);
+       state1 = ext_v;
 
-       if (0) { /* causes HiZ corruption on ivb:gt1 */
-               /* enable HiZ Raw Stall Optimization */
-               I915_WRITE(CACHE_MODE_0_GEN7,
-                          _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
-       }
+       t = i915_mch_val(dev_priv);
 
-       /* WaDisable4x2SubspanOptimization:ivb */
-       I915_WRITE(CACHE_MODE_1,
-                  _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
+       /* Revel in the empirically derived constants */
 
-       /*
-        * BSpec recommends 8x4 when MSAA is used,
-        * however in practice 16x4 seems fastest.
-        *
-        * Note that PS/WM thread counts depend on the WIZ hashing
-        * disable bit, which we don't touch here, but it's good
-        * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
-        */
-       I915_WRITE(GEN7_GT_MODE,
-                  GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+       /* Correction factor in 1/100000 units */
+       if (t > 80)
+               corr = ((t * 2349) + 135940);
+       else if (t >= 50)
+               corr = ((t * 964) + 29317);
+       else /* < 50 */
+               corr = ((t * 301) + 1004);
 
-       snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
-       snpcr &= ~GEN6_MBC_SNPCR_MASK;
-       snpcr |= GEN6_MBC_SNPCR_MED;
-       I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
+       corr = corr * ((150142 * state1) / 10000 - 78642);
+       corr /= 100000;
+       corr2 = (corr * dev_priv->ips.corr);
 
-       if (!HAS_PCH_NOP(dev))
-               cpt_init_clock_gating(dev);
+       state2 = (corr2 * state1) / 10000;
+       state2 /= 100; /* convert to mW */
 
-       gen6_check_mch_setup(dev);
+       __i915_update_gfx_val(dev_priv);
+
+       return dev_priv->ips.gfx_power + state2;
 }
 
-static void valleyview_init_clock_gating(struct drm_device *dev)
+unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_device *dev = dev_priv->dev;
+       unsigned long val;
 
-       I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
+       if (INTEL_INFO(dev)->gen != 5)
+               return 0;
 
-       /* WaDisableEarlyCull:vlv */
-       I915_WRITE(_3D_CHICKEN3,
-                  _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
+       spin_lock_irq(&mchdev_lock);
 
-       /* WaDisableBackToBackFlipFix:vlv */
-       I915_WRITE(IVB_CHICKEN3,
-                  CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
-                  CHICKEN3_DGMG_DONE_FIX_DISABLE);
+       val = __i915_gfx_val(dev_priv);
 
-       /* WaPsdDispatchEnable:vlv */
-       /* WaDisablePSDDualDispatchEnable:vlv */
-       I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
-                  _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
-                                     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
+       spin_unlock_irq(&mchdev_lock);
 
-       /* WaDisable_RenderCache_OperationalFlush:vlv */
-       I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+       return val;
+}
 
-       /* WaForceL3Serialization:vlv */
-       I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
-                  ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
+/**
+ * i915_read_mch_val - return value for IPS use
+ *
+ * Calculate and return a value for the IPS driver to use when deciding whether
+ * we have thermal and power headroom to increase CPU or GPU power budget.
+ */
+unsigned long i915_read_mch_val(void)
+{
+       struct drm_i915_private *dev_priv;
+       unsigned long chipset_val, graphics_val, ret = 0;
 
-       /* WaDisableDopClockGating:vlv */
-       I915_WRITE(GEN7_ROW_CHICKEN2,
-                  _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+       spin_lock_irq(&mchdev_lock);
+       if (!i915_mch_dev)
+               goto out_unlock;
+       dev_priv = i915_mch_dev;
 
-       /* This is required by WaCatErrorRejectionIssue:vlv */
-       I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
-                  I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
-                  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
+       chipset_val = __i915_chipset_val(dev_priv);
+       graphics_val = __i915_gfx_val(dev_priv);
 
-       gen7_setup_fixed_func_scheduler(dev_priv);
+       ret = chipset_val + graphics_val;
 
-       /*
-        * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
-        * This implements the WaDisableRCZUnitClockGating:vlv workaround.
-        */
-       I915_WRITE(GEN6_UCGCTL2,
-                  GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
+out_unlock:
+       spin_unlock_irq(&mchdev_lock);
 
-       /* WaDisableL3Bank2xClockGate:vlv
-        * Disabling L3 clock gating- MMIO 940c[25] = 1
-        * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
-       I915_WRITE(GEN7_UCGCTL4,
-                  I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(i915_read_mch_val);
 
-       I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
+/**
+ * i915_gpu_raise - raise GPU frequency limit
+ *
+ * Raise the limit; IPS indicates we have thermal headroom.
+ */
+bool i915_gpu_raise(void)
+{
+       struct drm_i915_private *dev_priv;
+       bool ret = true;
 
-       /*
-        * BSpec says this must be set, even though
-        * WaDisable4x2SubspanOptimization isn't listed for VLV.
-        */
-       I915_WRITE(CACHE_MODE_1,
-                  _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
+       spin_lock_irq(&mchdev_lock);
+       if (!i915_mch_dev) {
+               ret = false;
+               goto out_unlock;
+       }
+       dev_priv = i915_mch_dev;
 
-       /*
-        * WaIncreaseL3CreditsForVLVB0:vlv
-        * This is the hardware default actually.
-        */
-       I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
+       if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
+               dev_priv->ips.max_delay--;
 
-       /*
-        * WaDisableVLVClockGating_VBIIssue:vlv
-        * Disable clock gating on th GCFG unit to prevent a delay
-        * in the reporting of vblank events.
-        */
-       I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
+out_unlock:
+       spin_unlock_irq(&mchdev_lock);
+
+       return ret;
 }
+EXPORT_SYMBOL_GPL(i915_gpu_raise);
 
-static void cherryview_init_clock_gating(struct drm_device *dev)
+/**
+ * i915_gpu_lower - lower GPU frequency limit
+ *
+ * IPS indicates we're close to a thermal limit, so throttle back the GPU
+ * frequency maximum.
+ */
+bool i915_gpu_lower(void)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_i915_private *dev_priv;
+       bool ret = true;
 
-       I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
+       spin_lock_irq(&mchdev_lock);
+       if (!i915_mch_dev) {
+               ret = false;
+               goto out_unlock;
+       }
+       dev_priv = i915_mch_dev;
 
-       I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
+       if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
+               dev_priv->ips.max_delay++;
 
-       /* WaVSRefCountFullforceMissDisable:chv */
-       /* WaDSRefCountFullforceMissDisable:chv */
-       I915_WRITE(GEN7_FF_THREAD_MODE,
-                  I915_READ(GEN7_FF_THREAD_MODE) &
-                  ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
+out_unlock:
+       spin_unlock_irq(&mchdev_lock);
 
-       /* WaDisableSemaphoreAndSyncFlipWait:chv */
-       I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
-                  _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
+       return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_lower);
 
-       /* WaDisableCSUnitClockGating:chv */
-       I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
-                  GEN6_CSUNIT_CLOCK_GATE_DISABLE);
+/**
+ * i915_gpu_busy - indicate GPU business to IPS
+ *
+ * Tell the IPS driver whether or not the GPU is busy.
+ */
+bool i915_gpu_busy(void)
+{
+       struct drm_i915_private *dev_priv;
+       struct intel_engine_cs *ring;
+       bool ret = false;
+       int i;
 
-       /* WaDisableSDEUnitClockGating:chv */
-       I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
-                  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+       spin_lock_irq(&mchdev_lock);
+       if (!i915_mch_dev)
+               goto out_unlock;
+       dev_priv = i915_mch_dev;
 
-       /* WaDisableGunitClockGating:chv (pre-production hw) */
-       I915_WRITE(VLV_GUNIT_CLOCK_GATE, I915_READ(VLV_GUNIT_CLOCK_GATE) |
-                  GINT_DIS);
+       for_each_ring(ring, dev_priv, i)
+               ret |= !list_empty(&ring->request_list);
 
-       /* WaDisableFfDopClockGating:chv (pre-production hw) */
-       I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
-                  _MASKED_BIT_ENABLE(GEN8_FF_DOP_CLOCK_GATE_DISABLE));
+out_unlock:
+       spin_unlock_irq(&mchdev_lock);
 
-       /* WaDisableDopClockGating:chv (pre-production hw) */
-       I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
-                  GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
+       return ret;
 }
+EXPORT_SYMBOL_GPL(i915_gpu_busy);
 
-static void g4x_init_clock_gating(struct drm_device *dev)
+/**
+ * i915_gpu_turbo_disable - disable graphics turbo
+ *
+ * Disable graphics turbo by resetting the max frequency and setting the
+ * current frequency to the default.
+ */
+bool i915_gpu_turbo_disable(void)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-       uint32_t dspclk_gate;
+       struct drm_i915_private *dev_priv;
+       bool ret = true;
 
-       I915_WRITE(RENCLK_GATE_D1, 0);
-       I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
-                  GS_UNIT_CLOCK_GATE_DISABLE |
-                  CL_UNIT_CLOCK_GATE_DISABLE);
-       I915_WRITE(RAMCLK_GATE_D, 0);
-       dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
-               OVRUNIT_CLOCK_GATE_DISABLE |
-               OVCUNIT_CLOCK_GATE_DISABLE;
-       if (IS_GM45(dev))
-               dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
-       I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
+       spin_lock_irq(&mchdev_lock);
+       if (!i915_mch_dev) {
+               ret = false;
+               goto out_unlock;
+       }
+       dev_priv = i915_mch_dev;
 
-       /* WaDisableRenderCachePipelinedFlush */
-       I915_WRITE(CACHE_MODE_0,
-                  _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
+       dev_priv->ips.max_delay = dev_priv->ips.fstart;
+
+       if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
+               ret = false;
 
-       /* WaDisable_RenderCache_OperationalFlush:g4x */
-       I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+out_unlock:
+       spin_unlock_irq(&mchdev_lock);
 
-       g4x_disable_trickle_feed(dev);
+       return ret;
 }
+EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
 
-static void crestline_init_clock_gating(struct drm_device *dev)
+/**
+ * Tells the intel_ips driver that the i915 driver is now loaded, if
+ * IPS got loaded first.
+ *
+ * This awkward dance is so that neither module has to depend on the
+ * other in order for IPS to do the appropriate communication of
+ * GPU turbo limits to i915.
+ */
+static void
+ips_ping_for_i915_load(void)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
-       I915_WRITE(RENCLK_GATE_D2, 0);
-       I915_WRITE(DSPCLK_GATE_D, 0);
-       I915_WRITE(RAMCLK_GATE_D, 0);
-       I915_WRITE16(DEUC, 0);
-       I915_WRITE(MI_ARB_STATE,
-                  _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
+       void (*link)(void);
 
-       /* WaDisable_RenderCache_OperationalFlush:gen4 */
-       I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+       link = symbol_get(ips_link_to_i915_driver);
+       if (link) {
+               link();
+               symbol_put(ips_link_to_i915_driver);
+       }
 }
 
-static void broadwater_init_clock_gating(struct drm_device *dev)
+void intel_gpu_ips_init(struct drm_i915_private *dev_priv)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       /* We only register the i915 ips part with intel-ips once everything is
+        * set up, to avoid intel-ips sneaking in and reading bogus values. */
+       spin_lock_irq(&mchdev_lock);
+       i915_mch_dev = dev_priv;
+       spin_unlock_irq(&mchdev_lock);
 
-       I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
-                  I965_RCC_CLOCK_GATE_DISABLE |
-                  I965_RCPB_CLOCK_GATE_DISABLE |
-                  I965_ISC_CLOCK_GATE_DISABLE |
-                  I965_FBC_CLOCK_GATE_DISABLE);
-       I915_WRITE(RENCLK_GATE_D2, 0);
-       I915_WRITE(MI_ARB_STATE,
-                  _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
+       ips_ping_for_i915_load();
+}
 
-       /* WaDisable_RenderCache_OperationalFlush:gen4 */
-       I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+void intel_gpu_ips_teardown(void)
+{
+       spin_lock_irq(&mchdev_lock);
+       i915_mch_dev = NULL;
+       spin_unlock_irq(&mchdev_lock);
 }
 
-static void gen3_init_clock_gating(struct drm_device *dev)
+static void intel_init_emon(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 dstate = I915_READ(D_STATE);
+       u32 lcfuse;
+       u8 pxw[16];
+       int i;
 
-       dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
-               DSTATE_DOT_CLOCK_GATING;
-       I915_WRITE(D_STATE, dstate);
+       /* Disable to program */
+       I915_WRITE(ECR, 0);
+       POSTING_READ(ECR);
 
-       if (IS_PINEVIEW(dev))
-               I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
+       /* Program energy weights for various events */
+       I915_WRITE(SDEW, 0x15040d00);
+       I915_WRITE(CSIEW0, 0x007f0000);
+       I915_WRITE(CSIEW1, 0x1e220004);
+       I915_WRITE(CSIEW2, 0x04000004);
 
-       /* IIR "flip pending" means done if this bit is set */
-       I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
+       for (i = 0; i < 5; i++)
+               I915_WRITE(PEW + (i * 4), 0);
+       for (i = 0; i < 3; i++)
+               I915_WRITE(DEW + (i * 4), 0);
 
-       /* interrupts should cause a wake up from C3 */
-       I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
+       /* Program P-state weights to account for frequency power adjustment */
+       for (i = 0; i < 16; i++) {
+               u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4));
+               unsigned long freq = intel_pxfreq(pxvidfreq);
+               unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
+                       PXVFREQ_PX_SHIFT;
+               unsigned long val;
 
-       /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
-       I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
+               val = vid * vid;
+               val *= (freq / 1000);
+               val *= 255;
+               val /= (127*127*900);
+               if (val > 0xff)
+                       DRM_ERROR("bad pxval: %ld\n", val);
+               pxw[i] = val;
+       }
+       /* Render standby states get 0 weight */
+       pxw[14] = 0;
+       pxw[15] = 0;
 
-       I915_WRITE(MI_ARB_STATE,
-                  _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
+       for (i = 0; i < 4; i++) {
+               u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
+                       (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
+               I915_WRITE(PXW + (i * 4), val);
+       }
+
+       /* Adjust magic regs to magic values (more experimental results) */
+       I915_WRITE(OGW0, 0);
+       I915_WRITE(OGW1, 0);
+       I915_WRITE(EG0, 0x00007f00);
+       I915_WRITE(EG1, 0x0000000e);
+       I915_WRITE(EG2, 0x000e0000);
+       I915_WRITE(EG3, 0x68000300);
+       I915_WRITE(EG4, 0x42000000);
+       I915_WRITE(EG5, 0x00140031);
+       I915_WRITE(EG6, 0);
+       I915_WRITE(EG7, 0);
+
+       for (i = 0; i < 8; i++)
+               I915_WRITE(PXWL + (i * 4), 0);
+
+       /* Enable PMON + select events */
+       I915_WRITE(ECR, 0x80000019);
+
+       lcfuse = I915_READ(LCFUSE02);
+
+       dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
 }
 
-static void i85x_init_clock_gating(struct drm_device *dev)
+void intel_init_gt_powersave(struct drm_device *dev)
 {
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
+       i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
 
-       /* interrupts should cause a wake up from C3 */
-       I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
-                  _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
+       if (IS_CHERRYVIEW(dev))
+               cherryview_init_gt_powersave(dev);
+       else if (IS_VALLEYVIEW(dev))
+               valleyview_init_gt_powersave(dev);
+}
 
-       I915_WRITE(MEM_MODE,
-                  _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
+void intel_cleanup_gt_powersave(struct drm_device *dev)
+{
+       if (IS_CHERRYVIEW(dev))
+               return;
+       else if (IS_VALLEYVIEW(dev))
+               valleyview_cleanup_gt_powersave(dev);
 }
 
-static void i830_init_clock_gating(struct drm_device *dev)
+static void gen6_suspend_rps(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
+       flush_delayed_work(&dev_priv->rps.delayed_resume_work);
 
-       I915_WRITE(MEM_MODE,
-                  _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
-                  _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
+       /*
+        * TODO: disable RPS interrupts on GEN9+ too once RPS support
+        * is added for it.
+        */
+       if (INTEL_INFO(dev)->gen < 9)
+               gen6_disable_rps_interrupts(dev);
 }
 
-void intel_init_clock_gating(struct drm_device *dev)
+/**
+ * intel_suspend_gt_powersave - suspend PM work and helper threads
+ * @dev: drm device
+ *
+ * We don't want to disable RC6 or other features here, we just want
+ * to make sure any work we've queued has finished and won't bother
+ * us while we're suspended.
+ */
+void intel_suspend_gt_powersave(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       dev_priv->display.init_clock_gating(dev);
+       if (INTEL_INFO(dev)->gen < 6)
+               return;
+
+       gen6_suspend_rps(dev);
+
+       /* Force GPU to min freq during suspend */
+       gen6_rps_idle(dev_priv);
 }
 
-void intel_suspend_hw(struct drm_device *dev)
+void intel_disable_gt_powersave(struct drm_device *dev)
 {
-       if (HAS_PCH_LPT(dev))
-               lpt_suspend_hw(dev);
-}
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-#define for_each_power_well(i, power_well, domain_mask, power_domains) \
-       for (i = 0;                                                     \
-            i < (power_domains)->power_well_count &&                   \
-                ((power_well) = &(power_domains)->power_wells[i]);     \
-            i++)                                                       \
-               if ((power_well)->domains & (domain_mask))
+       if (IS_IRONLAKE_M(dev)) {
+               ironlake_disable_drps(dev);
+               ironlake_disable_rc6(dev);
+       } else if (INTEL_INFO(dev)->gen >= 6) {
+               intel_suspend_gt_powersave(dev);
 
-#define for_each_power_well_rev(i, power_well, domain_mask, power_domains) \
-       for (i = (power_domains)->power_well_count - 1;                  \
-            i >= 0 && ((power_well) = &(power_domains)->power_wells[i]);\
-            i--)                                                        \
-               if ((power_well)->domains & (domain_mask))
+               mutex_lock(&dev_priv->rps.hw_lock);
+               if (INTEL_INFO(dev)->gen >= 9)
+                       gen9_disable_rps(dev);
+               else if (IS_CHERRYVIEW(dev))
+                       cherryview_disable_rps(dev);
+               else if (IS_VALLEYVIEW(dev))
+                       valleyview_disable_rps(dev);
+               else
+                       gen6_disable_rps(dev);
 
-/**
- * We should only use the power well if we explicitly asked the hardware to
- * enable it, so check if it's enabled and also check if we've requested it to
- * be enabled.
- */
-static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv,
-                                  struct i915_power_well *power_well)
-{
-       return I915_READ(HSW_PWR_WELL_DRIVER) ==
-                    (HSW_PWR_WELL_ENABLE_REQUEST | HSW_PWR_WELL_STATE_ENABLED);
+               dev_priv->rps.enabled = false;
+               mutex_unlock(&dev_priv->rps.hw_lock);
+       }
 }
 
-bool intel_display_power_enabled_unlocked(struct drm_i915_private *dev_priv,
-                                         enum intel_display_power_domain domain)
+static void intel_gen6_powersave_work(struct work_struct *work)
 {
-       struct i915_power_domains *power_domains;
-       struct i915_power_well *power_well;
-       bool is_enabled;
-       int i;
+       struct drm_i915_private *dev_priv =
+               container_of(work, struct drm_i915_private,
+                            rps.delayed_resume_work.work);
+       struct drm_device *dev = dev_priv->dev;
 
-       if (dev_priv->pm.suspended)
-               return false;
+       mutex_lock(&dev_priv->rps.hw_lock);
+
+       /*
+        * TODO: reset/enable RPS interrupts on GEN9+ too, once RPS support is
+        * added for it.
+        */
+       if (INTEL_INFO(dev)->gen < 9)
+               gen6_reset_rps_interrupts(dev);
+
+       if (IS_CHERRYVIEW(dev)) {
+               cherryview_enable_rps(dev);
+       } else if (IS_VALLEYVIEW(dev)) {
+               valleyview_enable_rps(dev);
+       } else if (INTEL_INFO(dev)->gen >= 9) {
+               gen9_enable_rps(dev);
+       } else if (IS_BROADWELL(dev)) {
+               gen8_enable_rps(dev);
+               __gen6_update_ring_freq(dev);
+       } else {
+               gen6_enable_rps(dev);
+               __gen6_update_ring_freq(dev);
+       }
+       dev_priv->rps.enabled = true;
 
-       power_domains = &dev_priv->power_domains;
+       if (INTEL_INFO(dev)->gen < 9)
+               gen6_enable_rps_interrupts(dev);
 
-       is_enabled = true;
+       mutex_unlock(&dev_priv->rps.hw_lock);
 
-       for_each_power_well_rev(i, power_well, BIT(domain), power_domains) {
-               if (power_well->always_on)
-                       continue;
+       intel_runtime_pm_put(dev_priv);
+}
 
-               if (!power_well->hw_enabled) {
-                       is_enabled = false;
-                       break;
-               }
+void intel_enable_gt_powersave(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       if (IS_IRONLAKE_M(dev)) {
+               mutex_lock(&dev->struct_mutex);
+               ironlake_enable_drps(dev);
+               ironlake_enable_rc6(dev);
+               intel_init_emon(dev);
+               mutex_unlock(&dev->struct_mutex);
+       } else if (INTEL_INFO(dev)->gen >= 6) {
+               /*
+                * PCU communication is slow and this doesn't need to be
+                * done at any specific time, so do this out of our fast path
+                * to make resume and init faster.
+                *
+                * We depend on the HW RC6 power context save/restore
+                * mechanism when entering D3 through runtime PM suspend. So
+                * disable RPM until RPS/RC6 is properly setup. We can only
+                * get here via the driver load/system resume/runtime resume
+                * paths, so the _noresume version is enough (and in case of
+                * runtime resume it's necessary).
+                */
+               if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work,
+                                          round_jiffies_up_relative(HZ)))
+                       intel_runtime_pm_get_noresume(dev_priv);
        }
-
-       return is_enabled;
 }
 
-bool intel_display_power_enabled(struct drm_i915_private *dev_priv,
-                                enum intel_display_power_domain domain)
+void intel_reset_gt_powersave(struct drm_device *dev)
 {
-       struct i915_power_domains *power_domains;
-       bool ret;
-
-       power_domains = &dev_priv->power_domains;
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-       mutex_lock(&power_domains->lock);
-       ret = intel_display_power_enabled_unlocked(dev_priv, domain);
-       mutex_unlock(&power_domains->lock);
+       if (INTEL_INFO(dev)->gen < 6)
+               return;
 
-       return ret;
+       gen6_suspend_rps(dev);
+       dev_priv->rps.enabled = false;
 }
 
-/*
- * Starting with Haswell, we have a "Power Down Well" that can be turned off
- * when not needed anymore. We have 4 registers that can request the power well
- * to be enabled, and it will only be disabled if none of the registers is
- * requesting it to be enabled.
- */
-static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv)
+static void ibx_init_clock_gating(struct drm_device *dev)
 {
-       struct drm_device *dev = dev_priv->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
        /*
-        * After we re-enable the power well, if we touch VGA register 0x3d5
-        * we'll get unclaimed register interrupts. This stops after we write
-        * anything to the VGA MSR register. The vgacon module uses this
-        * register all the time, so if we unbind our driver and, as a
-        * consequence, bind vgacon, we'll get stuck in an infinite loop at
-        * console_unlock(). So make here we touch the VGA MSR register, making
-        * sure vgacon can keep working normally without triggering interrupts
-        * and error messages.
+        * On Ibex Peak and Cougar Point, we need to disable clock
+        * gating for the panel power sequencer or it will fail to
+        * start up when no ports are active.
         */
-       vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO);
-       outb(inb(VGA_MSR_READ), VGA_MSR_WRITE);
-       vga_put(dev->pdev, VGA_RSRC_LEGACY_IO);
-
-       if (IS_BROADWELL(dev))
-               gen8_irq_power_well_post_enable(dev_priv);
+       I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
 }
 
-static void hsw_set_power_well(struct drm_i915_private *dev_priv,
-                              struct i915_power_well *power_well, bool enable)
+static void g4x_disable_trickle_feed(struct drm_device *dev)
 {
-       bool is_enabled, enable_requested;
-       uint32_t tmp;
-
-       tmp = I915_READ(HSW_PWR_WELL_DRIVER);
-       is_enabled = tmp & HSW_PWR_WELL_STATE_ENABLED;
-       enable_requested = tmp & HSW_PWR_WELL_ENABLE_REQUEST;
-
-       if (enable) {
-               if (!enable_requested)
-                       I915_WRITE(HSW_PWR_WELL_DRIVER,
-                                  HSW_PWR_WELL_ENABLE_REQUEST);
-
-               if (!is_enabled) {
-                       DRM_DEBUG_KMS("Enabling power well\n");
-                       if (wait_for((I915_READ(HSW_PWR_WELL_DRIVER) &
-                                     HSW_PWR_WELL_STATE_ENABLED), 20))
-                               DRM_ERROR("Timeout enabling power well\n");
-               }
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int pipe;
 
-               hsw_power_well_post_enable(dev_priv);
-       } else {
-               if (enable_requested) {
-                       I915_WRITE(HSW_PWR_WELL_DRIVER, 0);
-                       POSTING_READ(HSW_PWR_WELL_DRIVER);
-                       DRM_DEBUG_KMS("Requesting to disable the power well\n");
-               }
+       for_each_pipe(dev_priv, pipe) {
+               I915_WRITE(DSPCNTR(pipe),
+                          I915_READ(DSPCNTR(pipe)) |
+                          DISPPLANE_TRICKLE_FEED_DISABLE);
+               intel_flush_primary_plane(dev_priv, pipe);
        }
 }
 
-static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv,
-                                  struct i915_power_well *power_well)
+static void ilk_init_lp_watermarks(struct drm_device *dev)
 {
-       hsw_set_power_well(dev_priv, power_well, power_well->count > 0);
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN);
+       I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN);
+       I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN);
 
        /*
-        * We're taking over the BIOS, so clear any requests made by it since
-        * the driver is in charge now.
+        * Don't touch WM1S_LP_EN here.
+        * Doing so could cause underruns.
         */
-       if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST)
-               I915_WRITE(HSW_PWR_WELL_BIOS, 0);
-}
-
-static void hsw_power_well_enable(struct drm_i915_private *dev_priv,
-                                 struct i915_power_well *power_well)
-{
-       hsw_set_power_well(dev_priv, power_well, true);
-}
-
-static void hsw_power_well_disable(struct drm_i915_private *dev_priv,
-                                  struct i915_power_well *power_well)
-{
-       hsw_set_power_well(dev_priv, power_well, false);
 }
 
-static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv,
-                                          struct i915_power_well *power_well)
-{
-}
-
-static bool i9xx_always_on_power_well_enabled(struct drm_i915_private *dev_priv,
-                                            struct i915_power_well *power_well)
-{
-       return true;
-}
-
-static void vlv_set_power_well(struct drm_i915_private *dev_priv,
-                              struct i915_power_well *power_well, bool enable)
+static void ironlake_init_clock_gating(struct drm_device *dev)
 {
-       enum punit_power_well power_well_id = power_well->data;
-       u32 mask;
-       u32 state;
-       u32 ctrl;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
 
-       mask = PUNIT_PWRGT_MASK(power_well_id);
-       state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) :
-                        PUNIT_PWRGT_PWR_GATE(power_well_id);
+       /*
+        * Required for FBC
+        * WaFbcDisableDpfcClockGating:ilk
+        */
+       dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
+                  ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
+                  ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
 
-       mutex_lock(&dev_priv->rps.hw_lock);
+       I915_WRITE(PCH_3DCGDIS0,
+                  MARIUNIT_CLOCK_GATE_DISABLE |
+                  SVSMUNIT_CLOCK_GATE_DISABLE);
+       I915_WRITE(PCH_3DCGDIS1,
+                  VFMUNIT_CLOCK_GATE_DISABLE);
 
-#define COND \
-       ((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state)
+       /*
+        * According to the spec the following bits should be set in
+        * order to enable memory self-refresh
+        * The bit 22/21 of 0x42004
+        * The bit 5 of 0x42020
+        * The bit 15 of 0x45000
+        */
+       I915_WRITE(ILK_DISPLAY_CHICKEN2,
+                  (I915_READ(ILK_DISPLAY_CHICKEN2) |
+                   ILK_DPARB_GATE | ILK_VSDPFD_FULL));
+       dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE;
+       I915_WRITE(DISP_ARB_CTL,
+                  (I915_READ(DISP_ARB_CTL) |
+                   DISP_FBC_WM_DIS));
 
-       if (COND)
-               goto out;
+       ilk_init_lp_watermarks(dev);
 
-       ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL);
-       ctrl &= ~mask;
-       ctrl |= state;
-       vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, ctrl);
+       /*
+        * Based on the document from hardware guys the following bits
+        * should be set unconditionally in order to enable FBC.
+        * The bit 22 of 0x42000
+        * The bit 22 of 0x42004
+        * The bit 7,8,9 of 0x42020.
+        */
+       if (IS_IRONLAKE_M(dev)) {
+               /* WaFbcAsynchFlipDisableFbcQueue:ilk */
+               I915_WRITE(ILK_DISPLAY_CHICKEN1,
+                          I915_READ(ILK_DISPLAY_CHICKEN1) |
+                          ILK_FBCQ_DIS);
+               I915_WRITE(ILK_DISPLAY_CHICKEN2,
+                          I915_READ(ILK_DISPLAY_CHICKEN2) |
+                          ILK_DPARB_GATE);
+       }
 
-       if (wait_for(COND, 100))
-               DRM_ERROR("timout setting power well state %08x (%08x)\n",
-                         state,
-                         vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL));
+       I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
 
-#undef COND
+       I915_WRITE(ILK_DISPLAY_CHICKEN2,
+                  I915_READ(ILK_DISPLAY_CHICKEN2) |
+                  ILK_ELPIN_409_SELECT);
+       I915_WRITE(_3D_CHICKEN2,
+                  _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
+                  _3D_CHICKEN2_WM_READ_PIPELINED);
 
-out:
-       mutex_unlock(&dev_priv->rps.hw_lock);
-}
+       /* WaDisableRenderCachePipelinedFlush:ilk */
+       I915_WRITE(CACHE_MODE_0,
+                  _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
 
-static void vlv_power_well_sync_hw(struct drm_i915_private *dev_priv,
-                                  struct i915_power_well *power_well)
-{
-       vlv_set_power_well(dev_priv, power_well, power_well->count > 0);
-}
+       /* WaDisable_RenderCache_OperationalFlush:ilk */
+       I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 
-static void vlv_power_well_enable(struct drm_i915_private *dev_priv,
-                                 struct i915_power_well *power_well)
-{
-       vlv_set_power_well(dev_priv, power_well, true);
-}
+       g4x_disable_trickle_feed(dev);
 
-static void vlv_power_well_disable(struct drm_i915_private *dev_priv,
-                                  struct i915_power_well *power_well)
-{
-       vlv_set_power_well(dev_priv, power_well, false);
+       ibx_init_clock_gating(dev);
 }
 
-static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
-                                  struct i915_power_well *power_well)
+static void cpt_init_clock_gating(struct drm_device *dev)
 {
-       int power_well_id = power_well->data;
-       bool enabled = false;
-       u32 mask;
-       u32 state;
-       u32 ctrl;
-
-       mask = PUNIT_PWRGT_MASK(power_well_id);
-       ctrl = PUNIT_PWRGT_PWR_ON(power_well_id);
-
-       mutex_lock(&dev_priv->rps.hw_lock);
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int pipe;
+       uint32_t val;
 
-       state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask;
        /*
-        * We only ever set the power-on and power-gate states, anything
-        * else is unexpected.
+        * On Ibex Peak and Cougar Point, we need to disable clock
+        * gating for the panel power sequencer or it will fail to
+        * start up when no ports are active.
         */
-       WARN_ON(state != PUNIT_PWRGT_PWR_ON(power_well_id) &&
-               state != PUNIT_PWRGT_PWR_GATE(power_well_id));
-       if (state == ctrl)
-               enabled = true;
-
-       /*
-        * A transient state at this point would mean some unexpected party
-        * is poking at the power controls too.
+       I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
+                  PCH_DPLUNIT_CLOCK_GATE_DISABLE |
+                  PCH_CPUNIT_CLOCK_GATE_DISABLE);
+       I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
+                  DPLS_EDP_PPS_FIX_DIS);
+       /* The below fixes the weird display corruption, a few pixels shifted
+        * downward, on (only) LVDS of some HP laptops with IVY.
         */
-       ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask;
-       WARN_ON(ctrl != state);
+       for_each_pipe(dev_priv, pipe) {
+               val = I915_READ(TRANS_CHICKEN2(pipe));
+               val |= TRANS_CHICKEN2_TIMING_OVERRIDE;
+               val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
+               if (dev_priv->vbt.fdi_rx_polarity_inverted)
+                       val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED;
+               val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK;
+               val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER;
+               val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH;
+               I915_WRITE(TRANS_CHICKEN2(pipe), val);
+       }
+       /* WADP0ClockGatingDisable */
+       for_each_pipe(dev_priv, pipe) {
+               I915_WRITE(TRANS_CHICKEN1(pipe),
+                          TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
+       }
+}
 
-       mutex_unlock(&dev_priv->rps.hw_lock);
+static void gen6_check_mch_setup(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t tmp;
 
-       return enabled;
+       tmp = I915_READ(MCH_SSKPD);
+       if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
+               DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
+                             tmp);
 }
 
-static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv,
-                                         struct i915_power_well *power_well)
+static void gen6_init_clock_gating(struct drm_device *dev)
 {
-       WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D);
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
 
-       vlv_set_power_well(dev_priv, power_well, true);
+       I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate);
 
-       spin_lock_irq(&dev_priv->irq_lock);
-       valleyview_enable_display_irqs(dev_priv);
-       spin_unlock_irq(&dev_priv->irq_lock);
+       I915_WRITE(ILK_DISPLAY_CHICKEN2,
+                  I915_READ(ILK_DISPLAY_CHICKEN2) |
+                  ILK_ELPIN_409_SELECT);
 
-       /*
-        * During driver initialization/resume we can avoid restoring the
-        * part of the HW/SW state that will be inited anyway explicitly.
-        */
-       if (dev_priv->power_domains.initializing)
-               return;
+       /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
+       I915_WRITE(_3D_CHICKEN,
+                  _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
 
-       intel_hpd_init(dev_priv->dev);
+       /* WaDisable_RenderCache_OperationalFlush:snb */
+       I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 
-       i915_redisable_vga_power_on(dev_priv->dev);
-}
+       /*
+        * BSpec recoomends 8x4 when MSAA is used,
+        * however in practice 16x4 seems fastest.
+        *
+        * Note that PS/WM thread counts depend on the WIZ hashing
+        * disable bit, which we don't touch here, but it's good
+        * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+        */
+       I915_WRITE(GEN6_GT_MODE,
+                  _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
 
-static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv,
-                                          struct i915_power_well *power_well)
-{
-       WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D);
+       ilk_init_lp_watermarks(dev);
 
-       spin_lock_irq(&dev_priv->irq_lock);
-       valleyview_disable_display_irqs(dev_priv);
-       spin_unlock_irq(&dev_priv->irq_lock);
+       I915_WRITE(CACHE_MODE_0,
+                  _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
 
-       vlv_set_power_well(dev_priv, power_well, false);
+       I915_WRITE(GEN6_UCGCTL1,
+                  I915_READ(GEN6_UCGCTL1) |
+                  GEN6_BLBUNIT_CLOCK_GATE_DISABLE |
+                  GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-       vlv_power_sequencer_reset(dev_priv);
-}
+       /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
+        * gating disable must be set.  Failure to set it results in
+        * flickering pixels due to Z write ordering failures after
+        * some amount of runtime in the Mesa "fire" demo, and Unigine
+        * Sanctuary and Tropics, and apparently anything else with
+        * alpha test or pixel discard.
+        *
+        * According to the spec, bit 11 (RCCUNIT) must also be set,
+        * but we didn't debug actual testcases to find it out.
+        *
+        * WaDisableRCCUnitClockGating:snb
+        * WaDisableRCPBUnitClockGating:snb
+        */
+       I915_WRITE(GEN6_UCGCTL2,
+                  GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
+                  GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
 
-static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
-                                          struct i915_power_well *power_well)
-{
-       WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC);
+       /* WaStripsFansDisableFastClipPerformanceFix:snb */
+       I915_WRITE(_3D_CHICKEN3,
+                  _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL));
 
        /*
-        * Enable the CRI clock source so we can get at the
-        * display and the reference clock for VGA
-        * hotplug / manual detection.
+        * Bspec says:
+        * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and
+        * 3DSTATE_SF number of SF output attributes is more than 16."
         */
-       I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) |
-                  DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV);
-       udelay(1); /* >10ns for cmnreset, >0ns for sidereset */
-
-       vlv_set_power_well(dev_priv, power_well, true);
+       I915_WRITE(_3D_CHICKEN3,
+                  _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH));
 
        /*
-        * From VLV2A0_DP_eDP_DPIO_driver_vbios_notes_10.docx -
-        *  6.  De-assert cmn_reset/side_reset. Same as VLV X0.
-        *   a. GUnit 0x2110 bit[0] set to 1 (def 0)
-        *   b. The other bits such as sfr settings / modesel may all
-        *      be set to 0.
+        * According to the spec the following bits should be
+        * set in order to enable memory self-refresh and fbc:
+        * The bit21 and bit22 of 0x42000
+        * The bit21 and bit22 of 0x42004
+        * The bit5 and bit7 of 0x42020
+        * The bit14 of 0x70180
+        * The bit14 of 0x71180
         *
-        * This should only be done on init and resume from S3 with
-        * both PLLs disabled, or we risk losing DPIO and PLL
-        * synchronization.
+        * WaFbcAsynchFlipDisableFbcQueue:snb
         */
-       I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) | DPIO_CMNRST);
-}
-
-static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
-                                           struct i915_power_well *power_well)
-{
-       enum pipe pipe;
-
-       WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC);
+       I915_WRITE(ILK_DISPLAY_CHICKEN1,
+                  I915_READ(ILK_DISPLAY_CHICKEN1) |
+                  ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
+       I915_WRITE(ILK_DISPLAY_CHICKEN2,
+                  I915_READ(ILK_DISPLAY_CHICKEN2) |
+                  ILK_DPARB_GATE | ILK_VSDPFD_FULL);
+       I915_WRITE(ILK_DSPCLK_GATE_D,
+                  I915_READ(ILK_DSPCLK_GATE_D) |
+                  ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
+                  ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
 
-       for_each_pipe(dev_priv, pipe)
-               assert_pll_disabled(dev_priv, pipe);
+       g4x_disable_trickle_feed(dev);
 
-       /* Assert common reset */
-       I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) & ~DPIO_CMNRST);
+       cpt_init_clock_gating(dev);
 
-       vlv_set_power_well(dev_priv, power_well, false);
+       gen6_check_mch_setup(dev);
 }
 
-static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
-                                          struct i915_power_well *power_well)
+static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
 {
-       enum dpio_phy phy;
-
-       WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC &&
-                    power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D);
+       uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
 
        /*
-        * Enable the CRI clock source so we can get at the
-        * display and the reference clock for VGA
-        * hotplug / manual detection.
+        * WaVSThreadDispatchOverride:ivb,vlv
+        *
+        * This actually overrides the dispatch
+        * mode for all thread types.
         */
-       if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) {
-               phy = DPIO_PHY0;
-               I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) |
-                          DPLL_REFA_CLK_ENABLE_VLV);
-               I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) |
-                          DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV);
-       } else {
-               phy = DPIO_PHY1;
-               I915_WRITE(DPLL(PIPE_C), I915_READ(DPLL(PIPE_C)) |
-                          DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV);
-       }
-       udelay(1); /* >10ns for cmnreset, >0ns for sidereset */
-       vlv_set_power_well(dev_priv, power_well, true);
-
-       /* Poll for phypwrgood signal */
-       if (wait_for(I915_READ(DISPLAY_PHY_STATUS) & PHY_POWERGOOD(phy), 1))
-               DRM_ERROR("Display PHY %d is not power up\n", phy);
+       reg &= ~GEN7_FF_SCHED_MASK;
+       reg |= GEN7_FF_TS_SCHED_HW;
+       reg |= GEN7_FF_VS_SCHED_HW;
+       reg |= GEN7_FF_DS_SCHED_HW;
 
-       I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) |
-                  PHY_COM_LANE_RESET_DEASSERT(phy));
+       I915_WRITE(GEN7_FF_THREAD_MODE, reg);
 }
 
-static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
-                                           struct i915_power_well *power_well)
+static void lpt_init_clock_gating(struct drm_device *dev)
 {
-       enum dpio_phy phy;
-
-       WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC &&
-                    power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D);
-
-       if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) {
-               phy = DPIO_PHY0;
-               assert_pll_disabled(dev_priv, PIPE_A);
-               assert_pll_disabled(dev_priv, PIPE_B);
-       } else {
-               phy = DPIO_PHY1;
-               assert_pll_disabled(dev_priv, PIPE_C);
-       }
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-       I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) &
-                  ~PHY_COM_LANE_RESET_DEASSERT(phy));
+       /*
+        * TODO: this bit should only be enabled when really needed, then
+        * disabled when not needed anymore in order to save power.
+        */
+       if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
+               I915_WRITE(SOUTH_DSPCLK_GATE_D,
+                          I915_READ(SOUTH_DSPCLK_GATE_D) |
+                          PCH_LP_PARTITION_LEVEL_DISABLE);
 
-       vlv_set_power_well(dev_priv, power_well, false);
+       /* WADPOClockGatingDisable:hsw */
+       I915_WRITE(_TRANSA_CHICKEN1,
+                  I915_READ(_TRANSA_CHICKEN1) |
+                  TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
 }
 
-static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
-                                       struct i915_power_well *power_well)
+static void lpt_suspend_hw(struct drm_device *dev)
 {
-       enum pipe pipe = power_well->data;
-       bool enabled;
-       u32 state, ctrl;
-
-       mutex_lock(&dev_priv->rps.hw_lock);
-
-       state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe);
-       /*
-        * We only ever set the power-on and power-gate states, anything
-        * else is unexpected.
-        */
-       WARN_ON(state != DP_SSS_PWR_ON(pipe) && state != DP_SSS_PWR_GATE(pipe));
-       enabled = state == DP_SSS_PWR_ON(pipe);
-
-       /*
-        * A transient state at this point would mean some unexpected party
-        * is poking at the power controls too.
-        */
-       ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe);
-       WARN_ON(ctrl << 16 != state);
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-       mutex_unlock(&dev_priv->rps.hw_lock);
+       if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
+               uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D);
 
-       return enabled;
+               val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
+               I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
+       }
 }
 
-static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
-                                   struct i915_power_well *power_well,
-                                   bool enable)
+static void broadwell_init_clock_gating(struct drm_device *dev)
 {
-       enum pipe pipe = power_well->data;
-       u32 state;
-       u32 ctrl;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       enum pipe pipe;
 
-       state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe);
+       I915_WRITE(WM3_LP_ILK, 0);
+       I915_WRITE(WM2_LP_ILK, 0);
+       I915_WRITE(WM1_LP_ILK, 0);
 
-       mutex_lock(&dev_priv->rps.hw_lock);
+       /* WaSwitchSolVfFArbitrationPriority:bdw */
+       I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
 
-#define COND \
-       ((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state)
+       /* WaPsrDPAMaskVBlankInSRD:bdw */
+       I915_WRITE(CHICKEN_PAR1_1,
+                  I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
 
-       if (COND)
-               goto out;
+       /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
+       for_each_pipe(dev_priv, pipe) {
+               I915_WRITE(CHICKEN_PIPESL_1(pipe),
+                          I915_READ(CHICKEN_PIPESL_1(pipe)) |
+                          BDW_DPRS_MASK_VBLANK_SRD);
+       }
 
-       ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
-       ctrl &= ~DP_SSC_MASK(pipe);
-       ctrl |= enable ? DP_SSC_PWR_ON(pipe) : DP_SSC_PWR_GATE(pipe);
-       vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, ctrl);
+       /* WaVSRefCountFullforceMissDisable:bdw */
+       /* WaDSRefCountFullforceMissDisable:bdw */
+       I915_WRITE(GEN7_FF_THREAD_MODE,
+                  I915_READ(GEN7_FF_THREAD_MODE) &
+                  ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
 
-       if (wait_for(COND, 100))
-               DRM_ERROR("timout setting power well state %08x (%08x)\n",
-                         state,
-                         vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ));
+       I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
+                  _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
 
-#undef COND
+       /* WaDisableSDEUnitClockGating:bdw */
+       I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+                  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
-out:
-       mutex_unlock(&dev_priv->rps.hw_lock);
+       lpt_init_clock_gating(dev);
 }
 
-static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv,
-                                       struct i915_power_well *power_well)
+static void haswell_init_clock_gating(struct drm_device *dev)
 {
-       chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0);
-}
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv,
-                                      struct i915_power_well *power_well)
-{
-       WARN_ON_ONCE(power_well->data != PIPE_A &&
-                    power_well->data != PIPE_B &&
-                    power_well->data != PIPE_C);
+       ilk_init_lp_watermarks(dev);
 
-       chv_set_pipe_power_well(dev_priv, power_well, true);
-}
+       /* L3 caching of data atomics doesn't work -- disable it. */
+       I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
+       I915_WRITE(HSW_ROW_CHICKEN3,
+                  _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE));
 
-static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv,
-                                       struct i915_power_well *power_well)
-{
-       WARN_ON_ONCE(power_well->data != PIPE_A &&
-                    power_well->data != PIPE_B &&
-                    power_well->data != PIPE_C);
+       /* This is required by WaCatErrorRejectionIssue:hsw */
+       I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
+                       I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
+                       GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
 
-       chv_set_pipe_power_well(dev_priv, power_well, false);
-}
+       /* WaVSRefCountFullforceMissDisable:hsw */
+       I915_WRITE(GEN7_FF_THREAD_MODE,
+                  I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
 
-static void check_power_well_state(struct drm_i915_private *dev_priv,
-                                  struct i915_power_well *power_well)
-{
-       bool enabled = power_well->ops->is_enabled(dev_priv, power_well);
+       /* WaDisable_RenderCache_OperationalFlush:hsw */
+       I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 
-       if (power_well->always_on || !i915.disable_power_well) {
-               if (!enabled)
-                       goto mismatch;
+       /* enable HiZ Raw Stall Optimization */
+       I915_WRITE(CACHE_MODE_0_GEN7,
+                  _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
 
-               return;
-       }
+       /* WaDisable4x2SubspanOptimization:hsw */
+       I915_WRITE(CACHE_MODE_1,
+                  _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
+
+       /*
+        * BSpec recommends 8x4 when MSAA is used,
+        * however in practice 16x4 seems fastest.
+        *
+        * Note that PS/WM thread counts depend on the WIZ hashing
+        * disable bit, which we don't touch here, but it's good
+        * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+        */
+       I915_WRITE(GEN7_GT_MODE,
+                  _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
 
-       if (enabled != (power_well->count > 0))
-               goto mismatch;
+       /* WaSwitchSolVfFArbitrationPriority:hsw */
+       I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
 
-       return;
+       /* WaRsPkgCStateDisplayPMReq:hsw */
+       I915_WRITE(CHICKEN_PAR1_1,
+                  I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES);
 
-mismatch:
-       WARN(1, "state mismatch for '%s' (always_on %d hw state %d use-count %d disable_power_well %d\n",
-                 power_well->name, power_well->always_on, enabled,
-                 power_well->count, i915.disable_power_well);
+       lpt_init_clock_gating(dev);
 }
 
-void intel_display_power_get(struct drm_i915_private *dev_priv,
-                            enum intel_display_power_domain domain)
+static void ivybridge_init_clock_gating(struct drm_device *dev)
 {
-       struct i915_power_domains *power_domains;
-       struct i915_power_well *power_well;
-       int i;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t snpcr;
 
-       intel_runtime_pm_get(dev_priv);
+       ilk_init_lp_watermarks(dev);
 
-       power_domains = &dev_priv->power_domains;
+       I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
 
-       mutex_lock(&power_domains->lock);
+       /* WaDisableEarlyCull:ivb */
+       I915_WRITE(_3D_CHICKEN3,
+                  _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
 
-       for_each_power_well(i, power_well, BIT(domain), power_domains) {
-               if (!power_well->count++) {
-                       DRM_DEBUG_KMS("enabling %s\n", power_well->name);
-                       power_well->ops->enable(dev_priv, power_well);
-                       power_well->hw_enabled = true;
-               }
+       /* WaDisableBackToBackFlipFix:ivb */
+       I915_WRITE(IVB_CHICKEN3,
+                  CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
+                  CHICKEN3_DGMG_DONE_FIX_DISABLE);
 
-               check_power_well_state(dev_priv, power_well);
-       }
+       /* WaDisablePSDDualDispatchEnable:ivb */
+       if (IS_IVB_GT1(dev))
+               I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
+                          _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
 
-       power_domains->domain_use_count[domain]++;
+       /* WaDisable_RenderCache_OperationalFlush:ivb */
+       I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 
-       mutex_unlock(&power_domains->lock);
-}
+       /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
+       I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
+                  GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
 
-void intel_display_power_put(struct drm_i915_private *dev_priv,
-                            enum intel_display_power_domain domain)
-{
-       struct i915_power_domains *power_domains;
-       struct i915_power_well *power_well;
-       int i;
+       /* WaApplyL3ControlAndL3ChickenMode:ivb */
+       I915_WRITE(GEN7_L3CNTLREG1,
+                       GEN7_WA_FOR_GEN7_L3_CONTROL);
+       I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER,
+                  GEN7_WA_L3_CHICKEN_MODE);
+       if (IS_IVB_GT1(dev))
+               I915_WRITE(GEN7_ROW_CHICKEN2,
+                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+       else {
+               /* must write both registers */
+               I915_WRITE(GEN7_ROW_CHICKEN2,
+                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+               I915_WRITE(GEN7_ROW_CHICKEN2_GT2,
+                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+       }
 
-       power_domains = &dev_priv->power_domains;
+       /* WaForceL3Serialization:ivb */
+       I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
+                  ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
 
-       mutex_lock(&power_domains->lock);
+       /*
+        * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
+        * This implements the WaDisableRCZUnitClockGating:ivb workaround.
+        */
+       I915_WRITE(GEN6_UCGCTL2,
+                  GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
 
-       WARN_ON(!power_domains->domain_use_count[domain]);
-       power_domains->domain_use_count[domain]--;
+       /* This is required by WaCatErrorRejectionIssue:ivb */
+       I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
+                       I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
+                       GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
 
-       for_each_power_well_rev(i, power_well, BIT(domain), power_domains) {
-               WARN_ON(!power_well->count);
+       g4x_disable_trickle_feed(dev);
 
-               if (!--power_well->count && i915.disable_power_well) {
-                       DRM_DEBUG_KMS("disabling %s\n", power_well->name);
-                       power_well->hw_enabled = false;
-                       power_well->ops->disable(dev_priv, power_well);
-               }
+       gen7_setup_fixed_func_scheduler(dev_priv);
 
-               check_power_well_state(dev_priv, power_well);
+       if (0) { /* causes HiZ corruption on ivb:gt1 */
+               /* enable HiZ Raw Stall Optimization */
+               I915_WRITE(CACHE_MODE_0_GEN7,
+                          _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
        }
 
-       mutex_unlock(&power_domains->lock);
-
-       intel_runtime_pm_put(dev_priv);
-}
-
-static struct i915_power_domains *hsw_pwr;
-
-/* Display audio driver power well request */
-int i915_request_power_well(void)
-{
-       struct drm_i915_private *dev_priv;
-
-       if (!hsw_pwr)
-               return -ENODEV;
+       /* WaDisable4x2SubspanOptimization:ivb */
+       I915_WRITE(CACHE_MODE_1,
+                  _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
-       dev_priv = container_of(hsw_pwr, struct drm_i915_private,
-                               power_domains);
-       intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(i915_request_power_well);
+       /*
+        * BSpec recommends 8x4 when MSAA is used,
+        * however in practice 16x4 seems fastest.
+        *
+        * Note that PS/WM thread counts depend on the WIZ hashing
+        * disable bit, which we don't touch here, but it's good
+        * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+        */
+       I915_WRITE(GEN7_GT_MODE,
+                  _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4));
 
-/* Display audio driver power well release */
-int i915_release_power_well(void)
-{
-       struct drm_i915_private *dev_priv;
+       snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
+       snpcr &= ~GEN6_MBC_SNPCR_MASK;
+       snpcr |= GEN6_MBC_SNPCR_MED;
+       I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr);
 
-       if (!hsw_pwr)
-               return -ENODEV;
+       if (!HAS_PCH_NOP(dev))
+               cpt_init_clock_gating(dev);
 
-       dev_priv = container_of(hsw_pwr, struct drm_i915_private,
-                               power_domains);
-       intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO);
-       return 0;
+       gen6_check_mch_setup(dev);
 }
-EXPORT_SYMBOL_GPL(i915_release_power_well);
 
-/*
- * Private interface for the audio driver to get CDCLK in kHz.
- *
- * Caller must request power well using i915_request_power_well() prior to
- * making the call.
- */
-int i915_get_cdclk_freq(void)
+static void valleyview_init_clock_gating(struct drm_device *dev)
 {
-       struct drm_i915_private *dev_priv;
-
-       if (!hsw_pwr)
-               return -ENODEV;
-
-       dev_priv = container_of(hsw_pwr, struct drm_i915_private,
-                               power_domains);
-
-       return intel_ddi_get_cdclk_freq(dev_priv);
-}
-EXPORT_SYMBOL_GPL(i915_get_cdclk_freq);
-
-
-#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1)
-
-#define HSW_ALWAYS_ON_POWER_DOMAINS (                  \
-       BIT(POWER_DOMAIN_PIPE_A) |                      \
-       BIT(POWER_DOMAIN_TRANSCODER_EDP) |              \
-       BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) |          \
-       BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) |          \
-       BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |          \
-       BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |          \
-       BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |          \
-       BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |          \
-       BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |          \
-       BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |          \
-       BIT(POWER_DOMAIN_PORT_CRT) |                    \
-       BIT(POWER_DOMAIN_PLLS) |                        \
-       BIT(POWER_DOMAIN_INIT))
-#define HSW_DISPLAY_POWER_DOMAINS (                            \
-       (POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS) |    \
-       BIT(POWER_DOMAIN_INIT))
-
-#define BDW_ALWAYS_ON_POWER_DOMAINS (                  \
-       HSW_ALWAYS_ON_POWER_DOMAINS |                   \
-       BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER))
-#define BDW_DISPLAY_POWER_DOMAINS (                            \
-       (POWER_DOMAIN_MASK & ~BDW_ALWAYS_ON_POWER_DOMAINS) |    \
-       BIT(POWER_DOMAIN_INIT))
-
-#define VLV_ALWAYS_ON_POWER_DOMAINS    BIT(POWER_DOMAIN_INIT)
-#define VLV_DISPLAY_POWER_DOMAINS      POWER_DOMAIN_MASK
-
-#define VLV_DPIO_CMN_BC_POWER_DOMAINS (                \
-       BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |  \
-       BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |  \
-       BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |  \
-       BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |  \
-       BIT(POWER_DOMAIN_PORT_CRT) |            \
-       BIT(POWER_DOMAIN_INIT))
-
-#define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS ( \
-       BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |  \
-       BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |  \
-       BIT(POWER_DOMAIN_INIT))
-
-#define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS ( \
-       BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |  \
-       BIT(POWER_DOMAIN_INIT))
-
-#define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS ( \
-       BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |  \
-       BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |  \
-       BIT(POWER_DOMAIN_INIT))
-
-#define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS ( \
-       BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |  \
-       BIT(POWER_DOMAIN_INIT))
-
-#define CHV_PIPE_A_POWER_DOMAINS (     \
-       BIT(POWER_DOMAIN_PIPE_A) |      \
-       BIT(POWER_DOMAIN_INIT))
-
-#define CHV_PIPE_B_POWER_DOMAINS (     \
-       BIT(POWER_DOMAIN_PIPE_B) |      \
-       BIT(POWER_DOMAIN_INIT))
-
-#define CHV_PIPE_C_POWER_DOMAINS (     \
-       BIT(POWER_DOMAIN_PIPE_C) |      \
-       BIT(POWER_DOMAIN_INIT))
-
-#define CHV_DPIO_CMN_BC_POWER_DOMAINS (                \
-       BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |  \
-       BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |  \
-       BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |  \
-       BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |  \
-       BIT(POWER_DOMAIN_INIT))
-
-#define CHV_DPIO_CMN_D_POWER_DOMAINS (         \
-       BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |  \
-       BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |  \
-       BIT(POWER_DOMAIN_INIT))
-
-#define CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS ( \
-       BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |  \
-       BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |  \
-       BIT(POWER_DOMAIN_INIT))
-
-#define CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS ( \
-       BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |  \
-       BIT(POWER_DOMAIN_INIT))
-
-static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
-       .sync_hw = i9xx_always_on_power_well_noop,
-       .enable = i9xx_always_on_power_well_noop,
-       .disable = i9xx_always_on_power_well_noop,
-       .is_enabled = i9xx_always_on_power_well_enabled,
-};
-
-static const struct i915_power_well_ops chv_pipe_power_well_ops = {
-       .sync_hw = chv_pipe_power_well_sync_hw,
-       .enable = chv_pipe_power_well_enable,
-       .disable = chv_pipe_power_well_disable,
-       .is_enabled = chv_pipe_power_well_enabled,
-};
-
-static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = {
-       .sync_hw = vlv_power_well_sync_hw,
-       .enable = chv_dpio_cmn_power_well_enable,
-       .disable = chv_dpio_cmn_power_well_disable,
-       .is_enabled = vlv_power_well_enabled,
-};
-
-static struct i915_power_well i9xx_always_on_power_well[] = {
-       {
-               .name = "always-on",
-               .always_on = 1,
-               .domains = POWER_DOMAIN_MASK,
-               .ops = &i9xx_always_on_power_well_ops,
-       },
-};
-
-static const struct i915_power_well_ops hsw_power_well_ops = {
-       .sync_hw = hsw_power_well_sync_hw,
-       .enable = hsw_power_well_enable,
-       .disable = hsw_power_well_disable,
-       .is_enabled = hsw_power_well_enabled,
-};
-
-static struct i915_power_well hsw_power_wells[] = {
-       {
-               .name = "always-on",
-               .always_on = 1,
-               .domains = HSW_ALWAYS_ON_POWER_DOMAINS,
-               .ops = &i9xx_always_on_power_well_ops,
-       },
-       {
-               .name = "display",
-               .domains = HSW_DISPLAY_POWER_DOMAINS,
-               .ops = &hsw_power_well_ops,
-       },
-};
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-static struct i915_power_well bdw_power_wells[] = {
-       {
-               .name = "always-on",
-               .always_on = 1,
-               .domains = BDW_ALWAYS_ON_POWER_DOMAINS,
-               .ops = &i9xx_always_on_power_well_ops,
-       },
-       {
-               .name = "display",
-               .domains = BDW_DISPLAY_POWER_DOMAINS,
-               .ops = &hsw_power_well_ops,
-       },
-};
+       I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
 
-static const struct i915_power_well_ops vlv_display_power_well_ops = {
-       .sync_hw = vlv_power_well_sync_hw,
-       .enable = vlv_display_power_well_enable,
-       .disable = vlv_display_power_well_disable,
-       .is_enabled = vlv_power_well_enabled,
-};
+       /* WaDisableEarlyCull:vlv */
+       I915_WRITE(_3D_CHICKEN3,
+                  _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL));
 
-static const struct i915_power_well_ops vlv_dpio_cmn_power_well_ops = {
-       .sync_hw = vlv_power_well_sync_hw,
-       .enable = vlv_dpio_cmn_power_well_enable,
-       .disable = vlv_dpio_cmn_power_well_disable,
-       .is_enabled = vlv_power_well_enabled,
-};
+       /* WaDisableBackToBackFlipFix:vlv */
+       I915_WRITE(IVB_CHICKEN3,
+                  CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE |
+                  CHICKEN3_DGMG_DONE_FIX_DISABLE);
 
-static const struct i915_power_well_ops vlv_dpio_power_well_ops = {
-       .sync_hw = vlv_power_well_sync_hw,
-       .enable = vlv_power_well_enable,
-       .disable = vlv_power_well_disable,
-       .is_enabled = vlv_power_well_enabled,
-};
+       /* WaPsdDispatchEnable:vlv */
+       /* WaDisablePSDDualDispatchEnable:vlv */
+       I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
+                  _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
+                                     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
 
-static struct i915_power_well vlv_power_wells[] = {
-       {
-               .name = "always-on",
-               .always_on = 1,
-               .domains = VLV_ALWAYS_ON_POWER_DOMAINS,
-               .ops = &i9xx_always_on_power_well_ops,
-       },
-       {
-               .name = "display",
-               .domains = VLV_DISPLAY_POWER_DOMAINS,
-               .data = PUNIT_POWER_WELL_DISP2D,
-               .ops = &vlv_display_power_well_ops,
-       },
-       {
-               .name = "dpio-tx-b-01",
-               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
-                          VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01,
-       },
-       {
-               .name = "dpio-tx-b-23",
-               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
-                          VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23,
-       },
-       {
-               .name = "dpio-tx-c-01",
-               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
-                          VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01,
-       },
-       {
-               .name = "dpio-tx-c-23",
-               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
-                          VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23,
-       },
-       {
-               .name = "dpio-common",
-               .domains = VLV_DPIO_CMN_BC_POWER_DOMAINS,
-               .data = PUNIT_POWER_WELL_DPIO_CMN_BC,
-               .ops = &vlv_dpio_cmn_power_well_ops,
-       },
-};
+       /* WaDisable_RenderCache_OperationalFlush:vlv */
+       I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 
-static struct i915_power_well chv_power_wells[] = {
-       {
-               .name = "always-on",
-               .always_on = 1,
-               .domains = VLV_ALWAYS_ON_POWER_DOMAINS,
-               .ops = &i9xx_always_on_power_well_ops,
-       },
-#if 0
-       {
-               .name = "display",
-               .domains = VLV_DISPLAY_POWER_DOMAINS,
-               .data = PUNIT_POWER_WELL_DISP2D,
-               .ops = &vlv_display_power_well_ops,
-       },
-       {
-               .name = "pipe-a",
-               .domains = CHV_PIPE_A_POWER_DOMAINS,
-               .data = PIPE_A,
-               .ops = &chv_pipe_power_well_ops,
-       },
-       {
-               .name = "pipe-b",
-               .domains = CHV_PIPE_B_POWER_DOMAINS,
-               .data = PIPE_B,
-               .ops = &chv_pipe_power_well_ops,
-       },
-       {
-               .name = "pipe-c",
-               .domains = CHV_PIPE_C_POWER_DOMAINS,
-               .data = PIPE_C,
-               .ops = &chv_pipe_power_well_ops,
-       },
-#endif
-       {
-               .name = "dpio-common-bc",
-               /*
-                * XXX: cmnreset for one PHY seems to disturb the other.
-                * As a workaround keep both powered on at the same
-                * time for now.
-                */
-               .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS,
-               .data = PUNIT_POWER_WELL_DPIO_CMN_BC,
-               .ops = &chv_dpio_cmn_power_well_ops,
-       },
-       {
-               .name = "dpio-common-d",
-               /*
-                * XXX: cmnreset for one PHY seems to disturb the other.
-                * As a workaround keep both powered on at the same
-                * time for now.
-                */
-               .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS,
-               .data = PUNIT_POWER_WELL_DPIO_CMN_D,
-               .ops = &chv_dpio_cmn_power_well_ops,
-       },
-#if 0
-       {
-               .name = "dpio-tx-b-01",
-               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01,
-       },
-       {
-               .name = "dpio-tx-b-23",
-               .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23,
-       },
-       {
-               .name = "dpio-tx-c-01",
-               .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01,
-       },
-       {
-               .name = "dpio-tx-c-23",
-               .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
-                          VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23,
-       },
-       {
-               .name = "dpio-tx-d-01",
-               .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS |
-                          CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_01,
-       },
-       {
-               .name = "dpio-tx-d-23",
-               .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS |
-                          CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS,
-               .ops = &vlv_dpio_power_well_ops,
-               .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_23,
-       },
-#endif
-};
+       /* WaForceL3Serialization:vlv */
+       I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
+                  ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
 
-static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv,
-                                                enum punit_power_well power_well_id)
-{
-       struct i915_power_domains *power_domains = &dev_priv->power_domains;
-       struct i915_power_well *power_well;
-       int i;
+       /* WaDisableDopClockGating:vlv */
+       I915_WRITE(GEN7_ROW_CHICKEN2,
+                  _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
 
-       for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) {
-               if (power_well->data == power_well_id)
-                       return power_well;
-       }
+       /* This is required by WaCatErrorRejectionIssue:vlv */
+       I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
+                  I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
+                  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
 
-       return NULL;
-}
+       gen7_setup_fixed_func_scheduler(dev_priv);
 
-#define set_power_wells(power_domains, __power_wells) ({               \
-       (power_domains)->power_wells = (__power_wells);                 \
-       (power_domains)->power_well_count = ARRAY_SIZE(__power_wells);  \
-})
+       /*
+        * According to the spec, bit 13 (RCZUNIT) must be set on IVB.
+        * This implements the WaDisableRCZUnitClockGating:vlv workaround.
+        */
+       I915_WRITE(GEN6_UCGCTL2,
+                  GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
 
-int intel_power_domains_init(struct drm_i915_private *dev_priv)
-{
-       struct i915_power_domains *power_domains = &dev_priv->power_domains;
+       /* WaDisableL3Bank2xClockGate:vlv
+        * Disabling L3 clock gating- MMIO 940c[25] = 1
+        * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
+       I915_WRITE(GEN7_UCGCTL4,
+                  I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
 
-       mutex_init(&power_domains->lock);
+       I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
 
        /*
-        * The enabling order will be from lower to higher indexed wells,
-        * the disabling order is reversed.
+        * BSpec says this must be set, even though
+        * WaDisable4x2SubspanOptimization isn't listed for VLV.
         */
-       if (IS_HASWELL(dev_priv->dev)) {
-               set_power_wells(power_domains, hsw_power_wells);
-               hsw_pwr = power_domains;
-       } else if (IS_BROADWELL(dev_priv->dev)) {
-               set_power_wells(power_domains, bdw_power_wells);
-               hsw_pwr = power_domains;
-       } else if (IS_CHERRYVIEW(dev_priv->dev)) {
-               set_power_wells(power_domains, chv_power_wells);
-       } else if (IS_VALLEYVIEW(dev_priv->dev)) {
-               set_power_wells(power_domains, vlv_power_wells);
-       } else {
-               set_power_wells(power_domains, i9xx_always_on_power_well);
-       }
+       I915_WRITE(CACHE_MODE_1,
+                  _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
-       return 0;
-}
+       /*
+        * WaIncreaseL3CreditsForVLVB0:vlv
+        * This is the hardware default actually.
+        */
+       I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
 
-void intel_power_domains_remove(struct drm_i915_private *dev_priv)
-{
-       hsw_pwr = NULL;
+       /*
+        * WaDisableVLVClockGating_VBIIssue:vlv
+        * Disable clock gating on th GCFG unit to prevent a delay
+        * in the reporting of vblank events.
+        */
+       I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS);
 }
 
-static void intel_power_domains_resume(struct drm_i915_private *dev_priv)
+static void cherryview_init_clock_gating(struct drm_device *dev)
 {
-       struct i915_power_domains *power_domains = &dev_priv->power_domains;
-       struct i915_power_well *power_well;
-       int i;
-
-       mutex_lock(&power_domains->lock);
-       for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) {
-               power_well->ops->sync_hw(dev_priv, power_well);
-               power_well->hw_enabled = power_well->ops->is_enabled(dev_priv,
-                                                                    power_well);
-       }
-       mutex_unlock(&power_domains->lock);
-}
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv)
-{
-       struct i915_power_well *cmn =
-               lookup_power_well(dev_priv, PUNIT_POWER_WELL_DPIO_CMN_BC);
-       struct i915_power_well *disp2d =
-               lookup_power_well(dev_priv, PUNIT_POWER_WELL_DISP2D);
+       I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE);
 
-       /* nothing to do if common lane is already off */
-       if (!cmn->ops->is_enabled(dev_priv, cmn))
-               return;
+       I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
 
-       /* If the display might be already active skip this */
-       if (disp2d->ops->is_enabled(dev_priv, disp2d) &&
-           I915_READ(DPIO_CTL) & DPIO_CMNRST)
-               return;
+       /* WaVSRefCountFullforceMissDisable:chv */
+       /* WaDSRefCountFullforceMissDisable:chv */
+       I915_WRITE(GEN7_FF_THREAD_MODE,
+                  I915_READ(GEN7_FF_THREAD_MODE) &
+                  ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
 
-       DRM_DEBUG_KMS("toggling display PHY side reset\n");
+       /* WaDisableSemaphoreAndSyncFlipWait:chv */
+       I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
+                  _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
 
-       /* cmnlane needs DPLL registers */
-       disp2d->ops->enable(dev_priv, disp2d);
+       /* WaDisableCSUnitClockGating:chv */
+       I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
+                  GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-       /*
-        * From VLV2A0_DP_eDP_HDMI_DPIO_driver_vbios_notes_11.docx:
-        * Need to assert and de-assert PHY SB reset by gating the
-        * common lane power, then un-gating it.
-        * Simply ungating isn't enough to reset the PHY enough to get
-        * ports and lanes running.
-        */
-       cmn->ops->disable(dev_priv, cmn);
+       /* WaDisableSDEUnitClockGating:chv */
+       I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+                  GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 }
 
-void intel_power_domains_init_hw(struct drm_i915_private *dev_priv)
+static void g4x_init_clock_gating(struct drm_device *dev)
 {
-       struct drm_device *dev = dev_priv->dev;
-       struct i915_power_domains *power_domains = &dev_priv->power_domains;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t dspclk_gate;
 
-       power_domains->initializing = true;
+       I915_WRITE(RENCLK_GATE_D1, 0);
+       I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
+                  GS_UNIT_CLOCK_GATE_DISABLE |
+                  CL_UNIT_CLOCK_GATE_DISABLE);
+       I915_WRITE(RAMCLK_GATE_D, 0);
+       dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
+               OVRUNIT_CLOCK_GATE_DISABLE |
+               OVCUNIT_CLOCK_GATE_DISABLE;
+       if (IS_GM45(dev))
+               dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
+       I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
 
-       if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) {
-               mutex_lock(&power_domains->lock);
-               vlv_cmnlane_wa(dev_priv);
-               mutex_unlock(&power_domains->lock);
-       }
+       /* WaDisableRenderCachePipelinedFlush */
+       I915_WRITE(CACHE_MODE_0,
+                  _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
 
-       /* For now, we need the power well to be always enabled. */
-       intel_display_set_init_power(dev_priv, true);
-       intel_power_domains_resume(dev_priv);
-       power_domains->initializing = false;
-}
+       /* WaDisable_RenderCache_OperationalFlush:g4x */
+       I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 
-void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv)
-{
-       intel_runtime_pm_get(dev_priv);
+       g4x_disable_trickle_feed(dev);
 }
 
-void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv)
+static void crestline_init_clock_gating(struct drm_device *dev)
 {
-       intel_runtime_pm_put(dev_priv);
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
+       I915_WRITE(RENCLK_GATE_D2, 0);
+       I915_WRITE(DSPCLK_GATE_D, 0);
+       I915_WRITE(RAMCLK_GATE_D, 0);
+       I915_WRITE16(DEUC, 0);
+       I915_WRITE(MI_ARB_STATE,
+                  _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
+
+       /* WaDisable_RenderCache_OperationalFlush:gen4 */
+       I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 }
 
-void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
+static void broadwater_init_clock_gating(struct drm_device *dev)
 {
-       struct drm_device *dev = dev_priv->dev;
-       struct device *device = &dev->pdev->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (!HAS_RUNTIME_PM(dev))
-               return;
+       I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
+                  I965_RCC_CLOCK_GATE_DISABLE |
+                  I965_RCPB_CLOCK_GATE_DISABLE |
+                  I965_ISC_CLOCK_GATE_DISABLE |
+                  I965_FBC_CLOCK_GATE_DISABLE);
+       I915_WRITE(RENCLK_GATE_D2, 0);
+       I915_WRITE(MI_ARB_STATE,
+                  _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
 
-       pm_runtime_get_sync(device);
-       WARN(dev_priv->pm.suspended, "Device still suspended.\n");
+       /* WaDisable_RenderCache_OperationalFlush:gen4 */
+       I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 }
 
-void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
+static void gen3_init_clock_gating(struct drm_device *dev)
 {
-       struct drm_device *dev = dev_priv->dev;
-       struct device *device = &dev->pdev->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 dstate = I915_READ(D_STATE);
 
-       if (!HAS_RUNTIME_PM(dev))
-               return;
+       dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
+               DSTATE_DOT_CLOCK_GATING;
+       I915_WRITE(D_STATE, dstate);
 
-       WARN(dev_priv->pm.suspended, "Getting nosync-ref while suspended.\n");
-       pm_runtime_get_noresume(device);
-}
+       if (IS_PINEVIEW(dev))
+               I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY));
 
-void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
-{
-       struct drm_device *dev = dev_priv->dev;
-       struct device *device = &dev->pdev->dev;
+       /* IIR "flip pending" means done if this bit is set */
+       I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE));
 
-       if (!HAS_RUNTIME_PM(dev))
-               return;
+       /* interrupts should cause a wake up from C3 */
+       I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN));
 
-       pm_runtime_mark_last_busy(device);
-       pm_runtime_put_autosuspend(device);
+       /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
+       I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
+
+       I915_WRITE(MI_ARB_STATE,
+                  _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
 }
 
-void intel_init_runtime_pm(struct drm_i915_private *dev_priv)
+static void i85x_init_clock_gating(struct drm_device *dev)
 {
-       struct drm_device *dev = dev_priv->dev;
-       struct device *device = &dev->pdev->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (!HAS_RUNTIME_PM(dev))
-               return;
+       I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
+
+       /* interrupts should cause a wake up from C3 */
+       I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) |
+                  _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE));
 
-       pm_runtime_set_active(device);
+       I915_WRITE(MEM_MODE,
+                  _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE));
+}
 
-       /*
-        * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
-        * requirement.
-        */
-       if (!intel_enable_rc6(dev)) {
-               DRM_INFO("RC6 disabled, disabling runtime PM support\n");
-               return;
-       }
+static void i830_init_clock_gating(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-       pm_runtime_set_autosuspend_delay(device, 10000); /* 10s */
-       pm_runtime_mark_last_busy(device);
-       pm_runtime_use_autosuspend(device);
+       I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
 
-       pm_runtime_put_autosuspend(device);
+       I915_WRITE(MEM_MODE,
+                  _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) |
+                  _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE));
 }
 
-void intel_fini_runtime_pm(struct drm_i915_private *dev_priv)
+void intel_init_clock_gating(struct drm_device *dev)
 {
-       struct drm_device *dev = dev_priv->dev;
-       struct device *device = &dev->pdev->dev;
-
-       if (!HAS_RUNTIME_PM(dev))
-               return;
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (!intel_enable_rc6(dev))
-               return;
+       dev_priv->display.init_clock_gating(dev);
+}
 
-       /* Make sure we're not suspended first. */
-       pm_runtime_get_sync(device);
-       pm_runtime_disable(device);
+void intel_suspend_hw(struct drm_device *dev)
+{
+       if (HAS_PCH_LPT(dev))
+               lpt_suspend_hw(dev);
 }
 
 /* Set up chip specific power management-related functions */
@@ -7198,28 +6320,7 @@ void intel_init_pm(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
-       if (HAS_FBC(dev)) {
-               if (INTEL_INFO(dev)->gen >= 7) {
-                       dev_priv->display.fbc_enabled = ironlake_fbc_enabled;
-                       dev_priv->display.enable_fbc = gen7_enable_fbc;
-                       dev_priv->display.disable_fbc = ironlake_disable_fbc;
-               } else if (INTEL_INFO(dev)->gen >= 5) {
-                       dev_priv->display.fbc_enabled = ironlake_fbc_enabled;
-                       dev_priv->display.enable_fbc = ironlake_enable_fbc;
-                       dev_priv->display.disable_fbc = ironlake_disable_fbc;
-               } else if (IS_GM45(dev)) {
-                       dev_priv->display.fbc_enabled = g4x_fbc_enabled;
-                       dev_priv->display.enable_fbc = g4x_enable_fbc;
-                       dev_priv->display.disable_fbc = g4x_disable_fbc;
-               } else {
-                       dev_priv->display.fbc_enabled = i8xx_fbc_enabled;
-                       dev_priv->display.enable_fbc = i8xx_enable_fbc;
-                       dev_priv->display.disable_fbc = i8xx_disable_fbc;
-
-                       /* This value was pulled out of someone's hat */
-                       I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT);
-               }
-       }
+       intel_fbc_init(dev_priv);
 
        /* For cxsr */
        if (IS_PINEVIEW(dev))
@@ -7228,7 +6329,13 @@ void intel_init_pm(struct drm_device *dev)
                i915_ironlake_get_mem_freq(dev);
 
        /* For FIFO watermark updates */
-       if (HAS_PCH_SPLIT(dev)) {
+       if (INTEL_INFO(dev)->gen >= 9) {
+               skl_setup_wm_latency(dev);
+
+               dev_priv->display.init_clock_gating = gen9_init_clock_gating;
+               dev_priv->display.update_wm = skl_update_wm;
+               dev_priv->display.update_sprite_wm = skl_update_sprite_wm;
+       } else if (HAS_PCH_SPLIT(dev)) {
                ilk_setup_wm_latency(dev);
 
                if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] &&
@@ -7309,7 +6416,7 @@ void intel_init_pm(struct drm_device *dev)
        }
 }
 
-int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
+int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
 {
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
@@ -7319,6 +6426,7 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
        }
 
        I915_WRITE(GEN6_PCODE_DATA, *val);
+       I915_WRITE(GEN6_PCODE_DATA1, 0);
        I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
 
        if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0,
@@ -7333,7 +6441,7 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
        return 0;
 }
 
-int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val)
+int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val)
 {
        WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
@@ -7356,99 +6464,66 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val)
        return 0;
 }
 
-static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
+static int vlv_gpu_freq_div(unsigned int czclk_freq)
 {
-       int div;
-
-       /* 4 x czclk */
-       switch (dev_priv->mem_freq) {
-       case 800:
-               div = 10;
-               break;
-       case 1066:
-               div = 12;
-               break;
-       case 1333:
-               div = 16;
-               break;
+       switch (czclk_freq) {
+       case 200:
+               return 10;
+       case 267:
+               return 12;
+       case 320:
+       case 333:
+               return 16;
+       case 400:
+               return 20;
        default:
                return -1;
        }
+}
+
+static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
+{
+       int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4);
 
-       return DIV_ROUND_CLOSEST(dev_priv->mem_freq * (val + 6 - 0xbd), 4 * div);
+       div = vlv_gpu_freq_div(czclk_freq);
+       if (div < 0)
+               return div;
+
+       return DIV_ROUND_CLOSEST(czclk_freq * (val + 6 - 0xbd), div);
 }
 
 static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val)
 {
-       int mul;
+       int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4);
 
-       /* 4 x czclk */
-       switch (dev_priv->mem_freq) {
-       case 800:
-               mul = 10;
-               break;
-       case 1066:
-               mul = 12;
-               break;
-       case 1333:
-               mul = 16;
-               break;
-       default:
-               return -1;
-       }
+       mul = vlv_gpu_freq_div(czclk_freq);
+       if (mul < 0)
+               return mul;
 
-       return DIV_ROUND_CLOSEST(4 * mul * val, dev_priv->mem_freq) + 0xbd - 6;
+       return DIV_ROUND_CLOSEST(mul * val, czclk_freq) + 0xbd - 6;
 }
 
 static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val)
 {
-       int div, freq;
-
-       switch (dev_priv->rps.cz_freq) {
-       case 200:
-               div = 5;
-               break;
-       case 267:
-               div = 6;
-               break;
-       case 320:
-       case 333:
-       case 400:
-               div = 8;
-               break;
-       default:
-               return -1;
-       }
+       int div, czclk_freq = dev_priv->rps.cz_freq;
 
-       freq = (DIV_ROUND_CLOSEST((dev_priv->rps.cz_freq * val), 2 * div) / 2);
+       div = vlv_gpu_freq_div(czclk_freq) / 2;
+       if (div < 0)
+               return div;
 
-       return freq;
+       return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2;
 }
 
 static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val)
 {
-       int mul, opcode;
+       int mul, czclk_freq = dev_priv->rps.cz_freq;
 
-       switch (dev_priv->rps.cz_freq) {
-       case 200:
-               mul = 5;
-               break;
-       case 267:
-               mul = 6;
-               break;
-       case 320:
-       case 333:
-       case 400:
-               mul = 8;
-               break;
-       default:
-               return -1;
-       }
+       mul = vlv_gpu_freq_div(czclk_freq) / 2;
+       if (mul < 0)
+               return mul;
 
        /* CHV needs even values */
-       opcode = (DIV_ROUND_CLOSEST((val * 2 * mul), dev_priv->rps.cz_freq) * 2);
-
-       return opcode;
+       return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2;
 }
 
 int vlv_gpu_freq(struct drm_i915_private *dev_priv, int val)
@@ -7485,5 +6560,4 @@ void intel_pm_setup(struct drm_device *dev)
                          intel_gen6_powersave_work);
 
        dev_priv->pm.suspended = false;
-       dev_priv->pm._irqs_disabled = false;
 }