X-Git-Url: http://git.cascardo.info/?p=cascardo%2Flinux.git;a=blobdiff_plain;f=drivers%2Fgpu%2Fdrm%2Fi915%2Fintel_pm.c;h=a3ebaa87310708138ba4565ee9fa6332c6a0b81b;hp=ad2fd605f76be43c847807996a0db06b8b8dbc05;hb=adc31849b27fefeca6c225d3895143a2ec6970fa;hpb=e61745769125a3c788e7aceb2a5fb680ebb461e6 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ad2fd605f76b..a3ebaa873107 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -30,9 +30,6 @@ #include "intel_drv.h" #include "../../../platform/x86/intel_ips.h" #include -#include -#include -#include /** * RC6 is a special power stage which allows the GPU to enter an very @@ -55,611 +52,31 @@ #define INTEL_RC6p_ENABLE (1<<1) #define INTEL_RC6pp_ENABLE (1<<2) -/* FBC, or Frame Buffer Compression, is a technique employed to compress the - * framebuffer contents in-memory, aiming at reducing the required bandwidth - * during in-memory transfers and, therefore, reduce the power packet. - * - * The benefits of FBC are mostly visible with solid backgrounds and - * variation-less patterns. - * - * FBC-related functionality can be enabled by the means of the - * i915.i915_enable_fbc parameter - */ - -static void i8xx_disable_fbc(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u32 fbc_ctl; - - /* Disable compression */ - fbc_ctl = I915_READ(FBC_CONTROL); - if ((fbc_ctl & FBC_CTL_EN) == 0) - return; - - fbc_ctl &= ~FBC_CTL_EN; - I915_WRITE(FBC_CONTROL, fbc_ctl); - - /* Wait for compressing bit to clear */ - if (wait_for((I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING) == 0, 10)) { - DRM_DEBUG_KMS("FBC idle timed out\n"); - return; - } - - DRM_DEBUG_KMS("disabled FBC\n"); -} - -static void i8xx_enable_fbc(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_framebuffer *fb = crtc->primary->fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int cfb_pitch; - int i; - u32 fbc_ctl; - - cfb_pitch = dev_priv->fbc.size / FBC_LL_SIZE; - if (fb->pitches[0] < cfb_pitch) - cfb_pitch = fb->pitches[0]; - - /* FBC_CTL wants 32B or 64B units */ - if (IS_GEN2(dev)) - cfb_pitch = (cfb_pitch / 32) - 1; - else - cfb_pitch = (cfb_pitch / 64) - 1; - - /* Clear old tags */ - for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++) - I915_WRITE(FBC_TAG + (i * 4), 0); - - if (IS_GEN4(dev)) { - u32 fbc_ctl2; - - /* Set it up... */ - fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | FBC_CTL_CPU_FENCE; - fbc_ctl2 |= FBC_CTL_PLANE(intel_crtc->plane); - I915_WRITE(FBC_CONTROL2, fbc_ctl2); - I915_WRITE(FBC_FENCE_OFF, crtc->y); - } - - /* enable it... */ - fbc_ctl = I915_READ(FBC_CONTROL); - fbc_ctl &= 0x3fff << FBC_CTL_INTERVAL_SHIFT; - fbc_ctl |= FBC_CTL_EN | FBC_CTL_PERIODIC; - if (IS_I945GM(dev)) - fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */ - fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT; - fbc_ctl |= obj->fence_reg; - I915_WRITE(FBC_CONTROL, fbc_ctl); - - DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %c\n", - cfb_pitch, crtc->y, plane_name(intel_crtc->plane)); -} - -static bool i8xx_fbc_enabled(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - return I915_READ(FBC_CONTROL) & FBC_CTL_EN; -} - -static void g4x_enable_fbc(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_framebuffer *fb = crtc->primary->fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 dpfc_ctl; - - dpfc_ctl = DPFC_CTL_PLANE(intel_crtc->plane) | DPFC_SR_EN; - if (drm_format_plane_cpp(fb->pixel_format, 0) == 2) - dpfc_ctl |= DPFC_CTL_LIMIT_2X; - else - dpfc_ctl |= DPFC_CTL_LIMIT_1X; - dpfc_ctl |= DPFC_CTL_FENCE_EN | obj->fence_reg; - - I915_WRITE(DPFC_FENCE_YOFF, crtc->y); - - /* enable it... */ - I915_WRITE(DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); - - DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane)); -} - -static void g4x_disable_fbc(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u32 dpfc_ctl; - - /* Disable compression */ - dpfc_ctl = I915_READ(DPFC_CONTROL); - if (dpfc_ctl & DPFC_CTL_EN) { - dpfc_ctl &= ~DPFC_CTL_EN; - I915_WRITE(DPFC_CONTROL, dpfc_ctl); - - DRM_DEBUG_KMS("disabled FBC\n"); - } -} - -static bool g4x_fbc_enabled(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN; -} - -static void sandybridge_blit_fbc_update(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u32 blt_ecoskpd; - - /* Make sure blitter notifies FBC of writes */ - - /* Blitter is part of Media powerwell on VLV. No impact of - * his param in other platforms for now */ - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_MEDIA); - - blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD); - blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY << - GEN6_BLITTER_LOCK_SHIFT; - I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); - blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY; - I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); - blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY << - GEN6_BLITTER_LOCK_SHIFT); - I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd); - POSTING_READ(GEN6_BLITTER_ECOSKPD); - - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_MEDIA); -} - -static void ironlake_enable_fbc(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_framebuffer *fb = crtc->primary->fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 dpfc_ctl; - - dpfc_ctl = DPFC_CTL_PLANE(intel_crtc->plane); - if (drm_format_plane_cpp(fb->pixel_format, 0) == 2) - dev_priv->fbc.threshold++; - - switch (dev_priv->fbc.threshold) { - case 4: - case 3: - dpfc_ctl |= DPFC_CTL_LIMIT_4X; - break; - case 2: - dpfc_ctl |= DPFC_CTL_LIMIT_2X; - break; - case 1: - dpfc_ctl |= DPFC_CTL_LIMIT_1X; - break; - } - dpfc_ctl |= DPFC_CTL_FENCE_EN; - if (IS_GEN5(dev)) - dpfc_ctl |= obj->fence_reg; - - I915_WRITE(ILK_DPFC_FENCE_YOFF, crtc->y); - I915_WRITE(ILK_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj) | ILK_FBC_RT_VALID); - /* enable it... */ - I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); - - if (IS_GEN6(dev)) { - I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | obj->fence_reg); - I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y); - sandybridge_blit_fbc_update(dev); - } - - DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane)); -} - -static void ironlake_disable_fbc(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u32 dpfc_ctl; - - /* Disable compression */ - dpfc_ctl = I915_READ(ILK_DPFC_CONTROL); - if (dpfc_ctl & DPFC_CTL_EN) { - dpfc_ctl &= ~DPFC_CTL_EN; - I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl); - - DRM_DEBUG_KMS("disabled FBC\n"); - } -} - -static bool ironlake_fbc_enabled(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - return I915_READ(ILK_DPFC_CONTROL) & DPFC_CTL_EN; -} - -static void gen7_enable_fbc(struct drm_crtc *crtc) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_framebuffer *fb = crtc->primary->fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 dpfc_ctl; - - dpfc_ctl = IVB_DPFC_CTL_PLANE(intel_crtc->plane); - if (drm_format_plane_cpp(fb->pixel_format, 0) == 2) - dev_priv->fbc.threshold++; - - switch (dev_priv->fbc.threshold) { - case 4: - case 3: - dpfc_ctl |= DPFC_CTL_LIMIT_4X; - break; - case 2: - dpfc_ctl |= DPFC_CTL_LIMIT_2X; - break; - case 1: - dpfc_ctl |= DPFC_CTL_LIMIT_1X; - break; - } - - dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; - - if (dev_priv->fbc.false_color) - dpfc_ctl |= FBC_CTL_FALSE_COLOR; - - I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); - - if (IS_IVYBRIDGE(dev)) { - /* WaFbcAsynchFlipDisableFbcQueue:ivb */ - I915_WRITE(ILK_DISPLAY_CHICKEN1, - I915_READ(ILK_DISPLAY_CHICKEN1) | - ILK_FBCQ_DIS); - } else { - /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */ - I915_WRITE(CHICKEN_PIPESL_1(intel_crtc->pipe), - I915_READ(CHICKEN_PIPESL_1(intel_crtc->pipe)) | - HSW_FBCQ_DIS); - } - - I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | obj->fence_reg); - I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y); - - sandybridge_blit_fbc_update(dev); - - DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane)); -} - -bool intel_fbc_enabled(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (!dev_priv->display.fbc_enabled) - return false; - - return dev_priv->display.fbc_enabled(dev); -} - -void gen8_fbc_sw_flush(struct drm_device *dev, u32 value) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (!IS_GEN8(dev)) - return; - - I915_WRITE(MSG_FBC_REND_STATE, value); -} - -static void intel_fbc_work_fn(struct work_struct *__work) -{ - struct intel_fbc_work *work = - container_of(to_delayed_work(__work), - struct intel_fbc_work, work); - struct drm_device *dev = work->crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - - mutex_lock(&dev->struct_mutex); - if (work == dev_priv->fbc.fbc_work) { - /* Double check that we haven't switched fb without cancelling - * the prior work. - */ - if (work->crtc->primary->fb == work->fb) { - dev_priv->display.enable_fbc(work->crtc); - - dev_priv->fbc.plane = to_intel_crtc(work->crtc)->plane; - dev_priv->fbc.fb_id = work->crtc->primary->fb->base.id; - dev_priv->fbc.y = work->crtc->y; - } - - dev_priv->fbc.fbc_work = NULL; - } - mutex_unlock(&dev->struct_mutex); - - kfree(work); -} - -static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv) -{ - if (dev_priv->fbc.fbc_work == NULL) - return; - - DRM_DEBUG_KMS("cancelling pending FBC enable\n"); - - /* Synchronisation is provided by struct_mutex and checking of - * dev_priv->fbc.fbc_work, so we can perform the cancellation - * entirely asynchronously. - */ - if (cancel_delayed_work(&dev_priv->fbc.fbc_work->work)) - /* tasklet was killed before being run, clean up */ - kfree(dev_priv->fbc.fbc_work); - - /* Mark the work as no longer wanted so that if it does - * wake-up (because the work was already running and waiting - * for our mutex), it will discover that is no longer - * necessary to run. - */ - dev_priv->fbc.fbc_work = NULL; -} - -static void intel_enable_fbc(struct drm_crtc *crtc) -{ - struct intel_fbc_work *work; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - - if (!dev_priv->display.enable_fbc) - return; - - intel_cancel_fbc_work(dev_priv); - - work = kzalloc(sizeof(*work), GFP_KERNEL); - if (work == NULL) { - DRM_ERROR("Failed to allocate FBC work structure\n"); - dev_priv->display.enable_fbc(crtc); - return; - } - - work->crtc = crtc; - work->fb = crtc->primary->fb; - INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn); - - dev_priv->fbc.fbc_work = work; - - /* Delay the actual enabling to let pageflipping cease and the - * display to settle before starting the compression. Note that - * this delay also serves a second purpose: it allows for a - * vblank to pass after disabling the FBC before we attempt - * to modify the control registers. - * - * A more complicated solution would involve tracking vblanks - * following the termination of the page-flipping sequence - * and indeed performing the enable as a co-routine and not - * waiting synchronously upon the vblank. - * - * WaFbcWaitForVBlankBeforeEnable:ilk,snb - */ - schedule_delayed_work(&work->work, msecs_to_jiffies(50)); -} - -void intel_disable_fbc(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - intel_cancel_fbc_work(dev_priv); - - if (!dev_priv->display.disable_fbc) - return; - - dev_priv->display.disable_fbc(dev); - dev_priv->fbc.plane = -1; -} - -static bool set_no_fbc_reason(struct drm_i915_private *dev_priv, - enum no_fbc_reason reason) -{ - if (dev_priv->fbc.no_fbc_reason == reason) - return false; - - dev_priv->fbc.no_fbc_reason = reason; - return true; -} - -/** - * intel_update_fbc - enable/disable FBC as needed - * @dev: the drm_device - * - * Set up the framebuffer compression hardware at mode set time. We - * enable it if possible: - * - plane A only (on pre-965) - * - no pixel mulitply/line duplication - * - no alpha buffer discard - * - no dual wide - * - framebuffer <= max_hdisplay in width, max_vdisplay in height - * - * We can't assume that any compression will take place (worst case), - * so the compressed buffer has to be the same size as the uncompressed - * one. It also must reside (along with the line length buffer) in - * stolen memory. - * - * We need to enable/disable FBC on a global basis. - */ -void intel_update_fbc(struct drm_device *dev) +static void gen9_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_crtc *crtc = NULL, *tmp_crtc; - struct intel_crtc *intel_crtc; - struct drm_framebuffer *fb; - struct drm_i915_gem_object *obj; - const struct drm_display_mode *adjusted_mode; - unsigned int max_width, max_height; - - if (!HAS_FBC(dev)) { - set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED); - return; - } - - if (!i915.powersave) { - if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM)) - DRM_DEBUG_KMS("fbc disabled per module param\n"); - return; - } /* - * If FBC is already on, we just have to verify that we can - * keep it that way... - * Need to disable if: - * - more than one pipe is active - * - changing FBC params (stride, fence, mode) - * - new fb is too large to fit in compressed buffer - * - going to an unsupported config (interlace, pixel multiply, etc.) + * WaDisableSDEUnitClockGating:skl + * This seems to be a pre-production w/a. */ - for_each_crtc(dev, tmp_crtc) { - if (intel_crtc_active(tmp_crtc) && - to_intel_crtc(tmp_crtc)->primary_enabled) { - if (crtc) { - if (set_no_fbc_reason(dev_priv, FBC_MULTIPLE_PIPES)) - DRM_DEBUG_KMS("more than one pipe active, disabling compression\n"); - goto out_disable; - } - crtc = tmp_crtc; - } - } - - if (!crtc || crtc->primary->fb == NULL) { - if (set_no_fbc_reason(dev_priv, FBC_NO_OUTPUT)) - DRM_DEBUG_KMS("no output, disabling\n"); - goto out_disable; - } - - intel_crtc = to_intel_crtc(crtc); - fb = crtc->primary->fb; - obj = intel_fb_obj(fb); - adjusted_mode = &intel_crtc->config.adjusted_mode; - - if (i915.enable_fbc < 0) { - if (set_no_fbc_reason(dev_priv, FBC_CHIP_DEFAULT)) - DRM_DEBUG_KMS("disabled per chip default\n"); - goto out_disable; - } - if (!i915.enable_fbc) { - if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM)) - DRM_DEBUG_KMS("fbc disabled per module param\n"); - goto out_disable; - } - if ((adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) || - (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)) { - if (set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE)) - DRM_DEBUG_KMS("mode incompatible with compression, " - "disabling\n"); - goto out_disable; - } - - if (INTEL_INFO(dev)->gen >= 8 || IS_HASWELL(dev)) { - max_width = 4096; - max_height = 4096; - } else if (IS_G4X(dev) || INTEL_INFO(dev)->gen >= 5) { - max_width = 4096; - max_height = 2048; - } else { - max_width = 2048; - max_height = 1536; - } - if (intel_crtc->config.pipe_src_w > max_width || - intel_crtc->config.pipe_src_h > max_height) { - if (set_no_fbc_reason(dev_priv, FBC_MODE_TOO_LARGE)) - DRM_DEBUG_KMS("mode too large for compression, disabling\n"); - goto out_disable; - } - if ((INTEL_INFO(dev)->gen < 4 || HAS_DDI(dev)) && - intel_crtc->plane != PLANE_A) { - if (set_no_fbc_reason(dev_priv, FBC_BAD_PLANE)) - DRM_DEBUG_KMS("plane not A, disabling compression\n"); - goto out_disable; - } + I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | + GEN8_SDEUNIT_CLOCK_GATE_DISABLE); - /* The use of a CPU fence is mandatory in order to detect writes - * by the CPU to the scanout and trigger updates to the FBC. - */ - if (obj->tiling_mode != I915_TILING_X || - obj->fence_reg == I915_FENCE_REG_NONE) { - if (set_no_fbc_reason(dev_priv, FBC_NOT_TILED)) - DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n"); - goto out_disable; - } - if (INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev) && - to_intel_plane(crtc->primary)->rotation != BIT(DRM_ROTATE_0)) { - if (set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE)) - DRM_DEBUG_KMS("Rotation unsupported, disabling\n"); - goto out_disable; - } - - /* If the kernel debugger is active, always disable compression */ - if (in_dbg_master()) - goto out_disable; - - if (i915_gem_stolen_setup_compression(dev, obj->base.size, - drm_format_plane_cpp(fb->pixel_format, 0))) { - if (set_no_fbc_reason(dev_priv, FBC_STOLEN_TOO_SMALL)) - DRM_DEBUG_KMS("framebuffer too large, disabling compression\n"); - goto out_disable; - } - - /* If the scanout has not changed, don't modify the FBC settings. - * Note that we make the fundamental assumption that the fb->obj - * cannot be unpinned (and have its GTT offset and fence revoked) - * without first being decoupled from the scanout and FBC disabled. + /* + * WaDisableDgMirrorFixInHalfSliceChicken5:skl + * This is a pre-production w/a. */ - if (dev_priv->fbc.plane == intel_crtc->plane && - dev_priv->fbc.fb_id == fb->base.id && - dev_priv->fbc.y == crtc->y) - return; - - if (intel_fbc_enabled(dev)) { - /* We update FBC along two paths, after changing fb/crtc - * configuration (modeswitching) and after page-flipping - * finishes. For the latter, we know that not only did - * we disable the FBC at the start of the page-flip - * sequence, but also more than one vblank has passed. - * - * For the former case of modeswitching, it is possible - * to switch between two FBC valid configurations - * instantaneously so we do need to disable the FBC - * before we can modify its control registers. We also - * have to wait for the next vblank for that to take - * effect. However, since we delay enabling FBC we can - * assume that a vblank has passed since disabling and - * that we can safely alter the registers in the deferred - * callback. - * - * In the scenario that we go from a valid to invalid - * and then back to valid FBC configuration we have - * no strict enforcement that a vblank occurred since - * disabling the FBC. However, along all current pipe - * disabling paths we do need to wait for a vblank at - * some point. And we wait before enabling FBC anyway. - */ - DRM_DEBUG_KMS("disabling active FBC for update\n"); - intel_disable_fbc(dev); - } - - intel_enable_fbc(crtc); - dev_priv->fbc.no_fbc_reason = FBC_OK; - return; + I915_WRITE(GEN9_HALF_SLICE_CHICKEN5, + I915_READ(GEN9_HALF_SLICE_CHICKEN5) & + ~GEN9_DG_MIRROR_FIX_ENABLE); -out_disable: - /* Multiple disables should be harmless */ - if (intel_fbc_enabled(dev)) { - DRM_DEBUG_KMS("unsupported config, disabling FBC\n"); - intel_disable_fbc(dev); - } - i915_gem_stolen_cleanup_compression(dev); + /* Wa4x4STCOptimizationDisable:skl */ + I915_WRITE(CACHE_MODE_1, + _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); } + static void i915_pineview_get_mem_freq(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -1310,6 +727,7 @@ static bool vlv_compute_drain_latency(struct drm_crtc *crtc, int *prec_mult, int *drain_latency) { + struct drm_device *dev = crtc->dev; int entries; int clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock; @@ -1320,8 +738,12 @@ static bool vlv_compute_drain_latency(struct drm_crtc *crtc, return false; entries = DIV_ROUND_UP(clock, 1000) * pixel_size; - *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 : - DRAIN_LATENCY_PRECISION_32; + if (IS_CHERRYVIEW(dev)) + *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_32 : + DRAIN_LATENCY_PRECISION_16; + else + *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 : + DRAIN_LATENCY_PRECISION_32; *drain_latency = (64 * (*prec_mult) * 4) / entries; if (*drain_latency > DRAIN_LATENCY_MASK) @@ -1340,15 +762,18 @@ static bool vlv_compute_drain_latency(struct drm_crtc *crtc, static void vlv_update_drain_latency(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pixel_size; int drain_latency; enum pipe pipe = intel_crtc->pipe; int plane_prec, prec_mult, plane_dl; + const int high_precision = IS_CHERRYVIEW(dev) ? + DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64; - plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_64 | - DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_64 | + plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_HIGH | + DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_HIGH | (DRAIN_LATENCY_MASK << DDL_CURSOR_SHIFT)); if (!intel_crtc_active(crtc)) { @@ -1359,9 +784,9 @@ static void vlv_update_drain_latency(struct drm_crtc *crtc) /* Primary plane Drain Latency */ pixel_size = crtc->primary->fb->bits_per_pixel / 8; /* BPP */ if (vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { - plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ? - DDL_PLANE_PRECISION_64 : - DDL_PLANE_PRECISION_32; + plane_prec = (prec_mult == high_precision) ? + DDL_PLANE_PRECISION_HIGH : + DDL_PLANE_PRECISION_LOW; plane_dl |= plane_prec | drain_latency; } @@ -1373,9 +798,9 @@ static void vlv_update_drain_latency(struct drm_crtc *crtc) /* Program cursor DL only if it is enabled */ if (intel_crtc->cursor_base && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { - plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ? - DDL_CURSOR_PRECISION_64 : - DDL_CURSOR_PRECISION_32; + plane_prec = (prec_mult == high_precision) ? + DDL_CURSOR_PRECISION_HIGH : + DDL_CURSOR_PRECISION_LOW; plane_dl |= plane_prec | (drain_latency << DDL_CURSOR_SHIFT); } @@ -1543,15 +968,17 @@ static void valleyview_update_sprite_wm(struct drm_plane *plane, int plane_prec; int sprite_dl; int prec_mult; + const int high_precision = IS_CHERRYVIEW(dev) ? + DRAIN_LATENCY_PRECISION_32 : DRAIN_LATENCY_PRECISION_64; - sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_64(sprite) | + sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_HIGH(sprite) | (DRAIN_LATENCY_MASK << DDL_SPRITE_SHIFT(sprite))); if (enabled && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { - plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ? - DDL_SPRITE_PRECISION_64(sprite) : - DDL_SPRITE_PRECISION_32(sprite); + plane_prec = (prec_mult == high_precision) ? + DDL_SPRITE_PRECISION_HIGH(sprite) : + DDL_SPRITE_PRECISION_LOW(sprite); sprite_dl |= plane_prec | (drain_latency << DDL_SPRITE_SHIFT(sprite)); } @@ -1915,6 +1342,14 @@ static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2; } +struct skl_pipe_wm_parameters { + bool active; + uint32_t pipe_htotal; + uint32_t pixel_rate; /* in KHz */ + struct intel_plane_wm_parameters plane[I915_MAX_PLANES]; + struct intel_plane_wm_parameters cursor; +}; + struct ilk_pipe_wm_parameters { bool active; uint32_t pipe_htotal; @@ -2226,21 +1661,92 @@ hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc) PIPE_WM_LINETIME_TIME(linetime); } -static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[5]) +static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) { struct drm_i915_private *dev_priv = dev->dev_private; - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { - uint64_t sskpd = I915_READ64(MCH_SSKPD); + if (IS_GEN9(dev)) { + uint32_t val; + int ret, i; + int level, max_level = ilk_wm_max_level(dev); - wm[0] = (sskpd >> 56) & 0xFF; - if (wm[0] == 0) - wm[0] = sskpd & 0xF; - wm[1] = (sskpd >> 4) & 0xFF; - wm[2] = (sskpd >> 12) & 0xFF; - wm[3] = (sskpd >> 20) & 0x1FF; - wm[4] = (sskpd >> 32) & 0x1FF; - } else if (INTEL_INFO(dev)->gen >= 6) { + /* read the first set of memory latencies[0:3] */ + val = 0; /* data0 to be programmed to 0 for first set */ + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_read(dev_priv, + GEN9_PCODE_READ_MEM_LATENCY, + &val); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret) { + DRM_ERROR("SKL Mailbox read error = %d\n", ret); + return; + } + + wm[0] = val & GEN9_MEM_LATENCY_LEVEL_MASK; + wm[1] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & + GEN9_MEM_LATENCY_LEVEL_MASK; + wm[2] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & + GEN9_MEM_LATENCY_LEVEL_MASK; + wm[3] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & + GEN9_MEM_LATENCY_LEVEL_MASK; + + /* read the second set of memory latencies[4:7] */ + val = 1; /* data0 to be programmed to 1 for second set */ + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_read(dev_priv, + GEN9_PCODE_READ_MEM_LATENCY, + &val); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("SKL Mailbox read error = %d\n", ret); + return; + } + + wm[4] = val & GEN9_MEM_LATENCY_LEVEL_MASK; + wm[5] = (val >> GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT) & + GEN9_MEM_LATENCY_LEVEL_MASK; + wm[6] = (val >> GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT) & + GEN9_MEM_LATENCY_LEVEL_MASK; + wm[7] = (val >> GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT) & + GEN9_MEM_LATENCY_LEVEL_MASK; + + /* + * punit doesn't take into account the read latency so we need + * to add 2us to the various latency levels we retrieve from + * the punit. + * - W0 is a bit special in that it's the only level that + * can't be disabled if we want to have display working, so + * we always add 2us there. + * - For levels >=1, punit returns 0us latency when they are + * disabled, so we respect that and don't add 2us then + * + * Additionally, if a level n (n > 1) has a 0us latency, all + * levels m (m >= n) need to be disabled. We make sure to + * sanitize the values out of the punit to satisfy this + * requirement. + */ + wm[0] += 2; + for (level = 1; level <= max_level; level++) + if (wm[level] != 0) + wm[level] += 2; + else { + for (i = level + 1; i <= max_level; i++) + wm[i] = 0; + + break; + } + } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + uint64_t sskpd = I915_READ64(MCH_SSKPD); + + wm[0] = (sskpd >> 56) & 0xFF; + if (wm[0] == 0) + wm[0] = sskpd & 0xF; + wm[1] = (sskpd >> 4) & 0xFF; + wm[2] = (sskpd >> 12) & 0xFF; + wm[3] = (sskpd >> 20) & 0x1FF; + wm[4] = (sskpd >> 32) & 0x1FF; + } else if (INTEL_INFO(dev)->gen >= 6) { uint32_t sskpd = I915_READ(MCH_SSKPD); wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK; @@ -2278,7 +1784,9 @@ static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5]) int ilk_wm_max_level(const struct drm_device *dev) { /* how many WM levels are we expecting */ - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + if (IS_GEN9(dev)) + return 7; + else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) return 4; else if (INTEL_INFO(dev)->gen >= 6) return 3; @@ -2288,7 +1796,7 @@ int ilk_wm_max_level(const struct drm_device *dev) static void intel_print_wm_latency(struct drm_device *dev, const char *name, - const uint16_t wm[5]) + const uint16_t wm[8]) { int level, max_level = ilk_wm_max_level(dev); @@ -2301,8 +1809,13 @@ static void intel_print_wm_latency(struct drm_device *dev, continue; } - /* WM1+ latency values in 0.5us units */ - if (level > 0) + /* + * - latencies are in us on gen9. + * - before then, WM1+ latency values are in 0.5us units + */ + if (IS_GEN9(dev)) + latency *= 10; + else if (level > 0) latency *= 5; DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n", @@ -2370,6 +1883,14 @@ static void ilk_setup_wm_latency(struct drm_device *dev) snb_wm_latency_quirk(dev); } +static void skl_setup_wm_latency(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + intel_read_wm_latency(dev, dev_priv->wm.skl_latency); + intel_print_wm_latency(dev, "Gen9 Plane", dev_priv->wm.skl_latency); +} + static void ilk_compute_wm_parameters(struct drm_crtc *crtc, struct ilk_pipe_wm_parameters *p) { @@ -2860,4337 +2381,3938 @@ static bool ilk_disable_lp_wm(struct drm_device *dev) return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); } -static void ilk_update_wm(struct drm_crtc *crtc) -{ - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct ilk_wm_maximums max; - struct ilk_pipe_wm_parameters params = {}; - struct ilk_wm_values results = {}; - enum intel_ddb_partitioning partitioning; - struct intel_pipe_wm pipe_wm = {}; - struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm; - struct intel_wm_config config = {}; +/* + * On gen9, we need to allocate Display Data Buffer (DDB) portions to the + * different active planes. + */ - ilk_compute_wm_parameters(crtc, ¶ms); +#define SKL_DDB_SIZE 896 /* in blocks */ - intel_compute_pipe_wm(crtc, ¶ms, &pipe_wm); +static void +skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, + struct drm_crtc *for_crtc, + const struct intel_wm_config *config, + const struct skl_pipe_wm_parameters *params, + struct skl_ddb_entry *alloc /* out */) +{ + struct drm_crtc *crtc; + unsigned int pipe_size, ddb_size; + int nth_active_pipe; - if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm))) + if (!params->active) { + alloc->start = 0; + alloc->end = 0; return; + } - intel_crtc->wm.active = pipe_wm; + ddb_size = SKL_DDB_SIZE; - ilk_compute_wm_config(dev, &config); + ddb_size -= 4; /* 4 blocks for bypass path allocation */ - ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max); - ilk_wm_merge(dev, &config, &max, &lp_wm_1_2); + nth_active_pipe = 0; + for_each_crtc(dev, crtc) { + if (!intel_crtc_active(crtc)) + continue; - /* 5/6 split only in single pipe config on IVB+ */ - if (INTEL_INFO(dev)->gen >= 7 && - config.num_pipes_active == 1 && config.sprites_enabled) { - ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max); - ilk_wm_merge(dev, &config, &max, &lp_wm_5_6); + if (crtc == for_crtc) + break; - best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6); - } else { - best_lp_wm = &lp_wm_1_2; + nth_active_pipe++; } - partitioning = (best_lp_wm == &lp_wm_1_2) ? - INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6; + pipe_size = ddb_size / config->num_pipes_active; + alloc->start = nth_active_pipe * ddb_size / config->num_pipes_active; + alloc->end = alloc->start + pipe_size; +} - ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results); +static unsigned int skl_cursor_allocation(const struct intel_wm_config *config) +{ + if (config->num_pipes_active == 1) + return 32; - ilk_write_wm_values(dev_priv, &results); + return 8; } -static void -ilk_update_sprite_wm(struct drm_plane *plane, - struct drm_crtc *crtc, - uint32_t sprite_width, uint32_t sprite_height, - int pixel_size, bool enabled, bool scaled) +static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) { - struct drm_device *dev = plane->dev; - struct intel_plane *intel_plane = to_intel_plane(plane); + entry->start = reg & 0x3ff; + entry->end = (reg >> 16) & 0x3ff; + if (entry->end) + entry->end += 1; +} - intel_plane->wm.enabled = enabled; - intel_plane->wm.scaled = scaled; - intel_plane->wm.horiz_pixels = sprite_width; - intel_plane->wm.vert_pixels = sprite_width; - intel_plane->wm.bytes_per_pixel = pixel_size; +void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, + struct skl_ddb_allocation *ddb /* out */) +{ + struct drm_device *dev = dev_priv->dev; + enum pipe pipe; + int plane; + u32 val; - /* - * IVB workaround: must disable low power watermarks for at least - * one frame before enabling scaling. LP watermarks can be re-enabled - * when scaling is disabled. - * - * WaCxSRDisabledForSpriteScaling:ivb - */ - if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev)) - intel_wait_for_vblank(dev, intel_plane->pipe); + for_each_pipe(dev_priv, pipe) { + for_each_plane(pipe, plane) { + val = I915_READ(PLANE_BUF_CFG(pipe, plane)); + skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane], + val); + } - ilk_update_wm(crtc); + val = I915_READ(CUR_BUF_CFG(pipe)); + skl_ddb_entry_init_from_hw(&ddb->cursor[pipe], val); + } } -static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) +static unsigned int +skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p) +{ + return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel; +} + +/* + * We don't overflow 32 bits. Worst case is 3 planes enabled, each fetching + * a 8192x4096@32bpp framebuffer: + * 3 * 4096 * 8192 * 4 < 2^32 + */ +static unsigned int +skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc, + const struct skl_pipe_wm_parameters *params) +{ + unsigned int total_data_rate = 0; + int plane; + + for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) { + const struct intel_plane_wm_parameters *p; + + p = ¶ms->plane[plane]; + if (!p->enabled) + continue; + + total_data_rate += skl_plane_relative_data_rate(p); + } + + return total_data_rate; +} + +static void +skl_allocate_pipe_ddb(struct drm_crtc *crtc, + const struct intel_wm_config *config, + const struct skl_pipe_wm_parameters *params, + struct skl_ddb_allocation *ddb /* out */) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct ilk_wm_values *hw = &dev_priv->wm.hw; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_pipe_wm *active = &intel_crtc->wm.active; enum pipe pipe = intel_crtc->pipe; - static const unsigned int wm0_pipe_reg[] = { - [PIPE_A] = WM0_PIPEA_ILK, - [PIPE_B] = WM0_PIPEB_ILK, - [PIPE_C] = WM0_PIPEC_IVB, - }; + struct skl_ddb_entry *alloc = &ddb->pipe[pipe]; + uint16_t alloc_size, start, cursor_blocks; + unsigned int total_data_rate; + int plane; + + skl_ddb_get_pipe_allocation_limits(dev, crtc, config, params, alloc); + alloc_size = skl_ddb_entry_size(alloc); + if (alloc_size == 0) { + memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); + memset(&ddb->cursor[pipe], 0, sizeof(ddb->cursor[pipe])); + return; + } - hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]); - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); + cursor_blocks = skl_cursor_allocation(config); + ddb->cursor[pipe].start = alloc->end - cursor_blocks; + ddb->cursor[pipe].end = alloc->end; - active->pipe_enabled = intel_crtc_active(crtc); + alloc_size -= cursor_blocks; + alloc->end -= cursor_blocks; - if (active->pipe_enabled) { - u32 tmp = hw->wm_pipe[pipe]; + /* + * Each active plane get a portion of the remaining space, in + * proportion to the amount of data they need to fetch from memory. + * + * FIXME: we may not allocate every single block here. + */ + total_data_rate = skl_get_total_relative_data_rate(intel_crtc, params); - /* - * For active pipes LP0 watermark is marked as - * enabled, and LP1+ watermaks as disabled since - * we can't really reverse compute them in case - * multiple pipes are active. - */ - active->wm[0].enable = true; - active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT; - active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT; - active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK; - active->linetime = hw->wm_linetime[pipe]; - } else { - int level, max_level = ilk_wm_max_level(dev); + start = alloc->start; + for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) { + const struct intel_plane_wm_parameters *p; + unsigned int data_rate; + uint16_t plane_blocks; + + p = ¶ms->plane[plane]; + if (!p->enabled) + continue; + + data_rate = skl_plane_relative_data_rate(p); /* - * For inactive pipes, all watermark levels - * should be marked as enabled but zeroed, - * which is what we'd compute them to. + * promote the expression to 64 bits to avoid overflowing, the + * result is < available as data_rate / total_data_rate < 1 */ - for (level = 0; level <= max_level; level++) - active->wm[level].enable = true; + plane_blocks = div_u64((uint64_t)alloc_size * data_rate, + total_data_rate); + + ddb->plane[pipe][plane].start = start; + ddb->plane[pipe][plane].end = start + plane_blocks; + + start += plane_blocks; } + } -void ilk_wm_get_hw_state(struct drm_device *dev) +static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_config *config) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct ilk_wm_values *hw = &dev_priv->wm.hw; - struct drm_crtc *crtc; - - for_each_crtc(dev, crtc) - ilk_pipe_wm_get_hw_state(crtc); + /* TODO: Take into account the scalers once we support them */ + return config->adjusted_mode.crtc_clock; +} - hw->wm_lp[0] = I915_READ(WM1_LP_ILK); - hw->wm_lp[1] = I915_READ(WM2_LP_ILK); - hw->wm_lp[2] = I915_READ(WM3_LP_ILK); +/* + * The max latency should be 257 (max the punit can code is 255 and we add 2us + * for the read latency) and bytes_per_pixel should always be <= 8, so that + * should allow pixel_rate up to ~2 GHz which seems sufficient since max + * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. +*/ +static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel, + uint32_t latency) +{ + uint32_t wm_intermediate_val, ret; - hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK); - if (INTEL_INFO(dev)->gen >= 7) { - hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB); - hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB); - } + if (latency == 0) + return UINT_MAX; - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ? - INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; - else if (IS_IVYBRIDGE(dev)) - hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ? - INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; + wm_intermediate_val = latency * pixel_rate * bytes_per_pixel; + ret = DIV_ROUND_UP(wm_intermediate_val, 1000); - hw->enable_fbc_wm = - !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS); + return ret; } -/** - * intel_update_watermarks - update FIFO watermark values based on current modes - * - * Calculate watermark values for the various WM regs based on current mode - * and plane configuration. - * - * There are several cases to deal with here: - * - normal (i.e. non-self-refresh) - * - self-refresh (SR) mode - * - lines are large relative to FIFO size (buffer can hold up to 2) - * - lines are small relative to FIFO size (buffer can hold more than 2 - * lines), so need to account for TLB latency - * - * The normal calculation is: - * watermark = dotclock * bytes per pixel * latency - * where latency is platform & configuration dependent (we assume pessimal - * values here). - * - * The SR calculation is: - * watermark = (trunc(latency/line time)+1) * surface width * - * bytes per pixel - * where - * line time = htotal / dotclock - * surface width = hdisplay for normal plane and 64 for cursor - * and latency is assumed to be high, as above. - * - * The final value programmed to the register should always be rounded up, - * and include an extra 2 entries to account for clock crossings. - * - * We don't use the sprite, so we can ignore that. And on Crestline we have - * to set the non-SR watermarks to 8. - */ -void intel_update_watermarks(struct drm_crtc *crtc) +static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, + uint32_t horiz_pixels, uint8_t bytes_per_pixel, + uint32_t latency) { - struct drm_i915_private *dev_priv = crtc->dev->dev_private; + uint32_t ret, plane_bytes_per_line, wm_intermediate_val; - if (dev_priv->display.update_wm) - dev_priv->display.update_wm(crtc); -} + if (latency == 0) + return UINT_MAX; -void intel_update_sprite_watermarks(struct drm_plane *plane, - struct drm_crtc *crtc, - uint32_t sprite_width, - uint32_t sprite_height, - int pixel_size, - bool enabled, bool scaled) -{ - struct drm_i915_private *dev_priv = plane->dev->dev_private; + plane_bytes_per_line = horiz_pixels * bytes_per_pixel; + wm_intermediate_val = latency * pixel_rate; + ret = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000) * + plane_bytes_per_line; - if (dev_priv->display.update_sprite_wm) - dev_priv->display.update_sprite_wm(plane, crtc, - sprite_width, sprite_height, - pixel_size, enabled, scaled); + return ret; } -static struct drm_i915_gem_object * -intel_alloc_context_page(struct drm_device *dev) +static bool skl_ddb_allocation_changed(const struct skl_ddb_allocation *new_ddb, + const struct intel_crtc *intel_crtc) { - struct drm_i915_gem_object *ctx; - int ret; + struct drm_device *dev = intel_crtc->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + const struct skl_ddb_allocation *cur_ddb = &dev_priv->wm.skl_hw.ddb; + enum pipe pipe = intel_crtc->pipe; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + if (memcmp(new_ddb->plane[pipe], cur_ddb->plane[pipe], + sizeof(new_ddb->plane[pipe]))) + return true; - ctx = i915_gem_alloc_object(dev, 4096); - if (!ctx) { - DRM_DEBUG("failed to alloc power context, RC6 disabled\n"); - return NULL; - } + if (memcmp(&new_ddb->cursor[pipe], &cur_ddb->cursor[pipe], + sizeof(new_ddb->cursor[pipe]))) + return true; - ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0); - if (ret) { - DRM_ERROR("failed to pin power context: %d\n", ret); - goto err_unref; + return false; +} + +static void skl_compute_wm_global_parameters(struct drm_device *dev, + struct intel_wm_config *config) +{ + struct drm_crtc *crtc; + struct drm_plane *plane; + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) + config->num_pipes_active += intel_crtc_active(crtc); + + /* FIXME: I don't think we need those two global parameters on SKL */ + list_for_each_entry(plane, &dev->mode_config.plane_list, head) { + struct intel_plane *intel_plane = to_intel_plane(plane); + + config->sprites_enabled |= intel_plane->wm.enabled; + config->sprites_scaled |= intel_plane->wm.scaled; } +} - ret = i915_gem_object_set_to_gtt_domain(ctx, 1); - if (ret) { - DRM_ERROR("failed to set-domain on power context: %d\n", ret); - goto err_unpin; +static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc, + struct skl_pipe_wm_parameters *p) +{ + struct drm_device *dev = crtc->dev; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + enum pipe pipe = intel_crtc->pipe; + struct drm_plane *plane; + int i = 1; /* Index for sprite planes start */ + + p->active = intel_crtc_active(crtc); + if (p->active) { + p->pipe_htotal = intel_crtc->config.adjusted_mode.crtc_htotal; + p->pixel_rate = skl_pipe_pixel_rate(&intel_crtc->config); + + /* + * For now, assume primary and cursor planes are always enabled. + */ + p->plane[0].enabled = true; + p->plane[0].bytes_per_pixel = + crtc->primary->fb->bits_per_pixel / 8; + p->plane[0].horiz_pixels = intel_crtc->config.pipe_src_w; + p->plane[0].vert_pixels = intel_crtc->config.pipe_src_h; + + p->cursor.enabled = true; + p->cursor.bytes_per_pixel = 4; + p->cursor.horiz_pixels = intel_crtc->cursor_width ? + intel_crtc->cursor_width : 64; } - return ctx; + list_for_each_entry(plane, &dev->mode_config.plane_list, head) { + struct intel_plane *intel_plane = to_intel_plane(plane); -err_unpin: - i915_gem_object_ggtt_unpin(ctx); -err_unref: - drm_gem_object_unreference(&ctx->base); - return NULL; + if (intel_plane->pipe == pipe && + plane->type == DRM_PLANE_TYPE_OVERLAY) + p->plane[i++] = intel_plane->wm; + } } -/** - * Lock protecting IPS related data structures - */ -DEFINE_SPINLOCK(mchdev_lock); +static bool skl_compute_plane_wm(struct skl_pipe_wm_parameters *p, + struct intel_plane_wm_parameters *p_params, + uint16_t ddb_allocation, + uint32_t mem_value, + uint16_t *out_blocks, /* out */ + uint8_t *out_lines /* out */) +{ + uint32_t method1, method2, plane_bytes_per_line, res_blocks, res_lines; + uint32_t result_bytes; -/* Global for IPS driver to get at the current i915 device. Protected by - * mchdev_lock. */ -static struct drm_i915_private *i915_mch_dev; + if (mem_value == 0 || !p->active || !p_params->enabled) + return false; -bool ironlake_set_drps(struct drm_device *dev, u8 val) + method1 = skl_wm_method1(p->pixel_rate, + p_params->bytes_per_pixel, + mem_value); + method2 = skl_wm_method2(p->pixel_rate, + p->pipe_htotal, + p_params->horiz_pixels, + p_params->bytes_per_pixel, + mem_value); + + plane_bytes_per_line = p_params->horiz_pixels * + p_params->bytes_per_pixel; + + /* For now xtile and linear */ + if (((ddb_allocation * 512) / plane_bytes_per_line) >= 1) + result_bytes = min(method1, method2); + else + result_bytes = method1; + + res_blocks = DIV_ROUND_UP(result_bytes, 512) + 1; + res_lines = DIV_ROUND_UP(result_bytes, plane_bytes_per_line); + + if (res_blocks > ddb_allocation || res_lines > 31) + return false; + + *out_blocks = res_blocks; + *out_lines = res_lines; + + return true; +} + +static void skl_compute_wm_level(const struct drm_i915_private *dev_priv, + struct skl_ddb_allocation *ddb, + struct skl_pipe_wm_parameters *p, + enum pipe pipe, + int level, + int num_planes, + struct skl_wm_level *result) { - struct drm_i915_private *dev_priv = dev->dev_private; - u16 rgvswctl; + uint16_t latency = dev_priv->wm.skl_latency[level]; + uint16_t ddb_blocks; + int i; - assert_spin_locked(&mchdev_lock); + for (i = 0; i < num_planes; i++) { + ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][i]); - rgvswctl = I915_READ16(MEMSWCTL); - if (rgvswctl & MEMCTL_CMD_STS) { - DRM_DEBUG("gpu busy, RCS change rejected\n"); - return false; /* still busy with another command */ + result->plane_en[i] = skl_compute_plane_wm(p, &p->plane[i], + ddb_blocks, + latency, + &result->plane_res_b[i], + &result->plane_res_l[i]); } - rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | - (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; - I915_WRITE16(MEMSWCTL, rgvswctl); - POSTING_READ16(MEMSWCTL); + ddb_blocks = skl_ddb_entry_size(&ddb->cursor[pipe]); + result->cursor_en = skl_compute_plane_wm(p, &p->cursor, ddb_blocks, + latency, &result->cursor_res_b, + &result->cursor_res_l); +} - rgvswctl |= MEMCTL_CMD_STS; - I915_WRITE16(MEMSWCTL, rgvswctl); +static uint32_t +skl_compute_linetime_wm(struct drm_crtc *crtc, struct skl_pipe_wm_parameters *p) +{ + if (!intel_crtc_active(crtc)) + return 0; + + return DIV_ROUND_UP(8 * p->pipe_htotal * 1000, p->pixel_rate); - return true; } -static void ironlake_enable_drps(struct drm_device *dev) +static void skl_compute_transition_wm(struct drm_crtc *crtc, + struct skl_pipe_wm_parameters *params, + struct skl_wm_level *trans_wm /* out */) { - struct drm_i915_private *dev_priv = dev->dev_private; - u32 rgvmodectl = I915_READ(MEMMODECTL); - u8 fmax, fmin, fstart, vstart; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int i; - spin_lock_irq(&mchdev_lock); + if (!params->active) + return; - /* Enable temp reporting */ - I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); - I915_WRITE16(TSC1, I915_READ(TSC1) | TSE); + /* Until we know more, just disable transition WMs */ + for (i = 0; i < intel_num_planes(intel_crtc); i++) + trans_wm->plane_en[i] = false; + trans_wm->cursor_en = false; +} - /* 100ms RC evaluation intervals */ - I915_WRITE(RCUPEI, 100000); - I915_WRITE(RCDNEI, 100000); +static void skl_compute_pipe_wm(struct drm_crtc *crtc, + struct skl_ddb_allocation *ddb, + struct skl_pipe_wm_parameters *params, + struct skl_pipe_wm *pipe_wm) +{ + struct drm_device *dev = crtc->dev; + const struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int level, max_level = ilk_wm_max_level(dev); - /* Set max/min thresholds to 90ms and 80ms respectively */ - I915_WRITE(RCBMAXAVG, 90000); - I915_WRITE(RCBMINAVG, 80000); + for (level = 0; level <= max_level; level++) { + skl_compute_wm_level(dev_priv, ddb, params, intel_crtc->pipe, + level, intel_num_planes(intel_crtc), + &pipe_wm->wm[level]); + } + pipe_wm->linetime = skl_compute_linetime_wm(crtc, params); - I915_WRITE(MEMIHYST, 1); + skl_compute_transition_wm(crtc, params, &pipe_wm->trans_wm); +} - /* Set up min, max, and cur for interrupt handling */ - fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; - fmin = (rgvmodectl & MEMMODE_FMIN_MASK); - fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> - MEMMODE_FSTART_SHIFT; +static void skl_compute_wm_results(struct drm_device *dev, + struct skl_pipe_wm_parameters *p, + struct skl_pipe_wm *p_wm, + struct skl_wm_values *r, + struct intel_crtc *intel_crtc) +{ + int level, max_level = ilk_wm_max_level(dev); + enum pipe pipe = intel_crtc->pipe; + uint32_t temp; + int i; - vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >> - PXVFREQ_PX_SHIFT; + for (level = 0; level <= max_level; level++) { + for (i = 0; i < intel_num_planes(intel_crtc); i++) { + temp = 0; - dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ - dev_priv->ips.fstart = fstart; + temp |= p_wm->wm[level].plane_res_l[i] << + PLANE_WM_LINES_SHIFT; + temp |= p_wm->wm[level].plane_res_b[i]; + if (p_wm->wm[level].plane_en[i]) + temp |= PLANE_WM_EN; - dev_priv->ips.max_delay = fstart; - dev_priv->ips.min_delay = fmin; - dev_priv->ips.cur_delay = fstart; + r->plane[pipe][i][level] = temp; + } - DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", - fmax, fmin, fstart); + temp = 0; - I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); + temp |= p_wm->wm[level].cursor_res_l << PLANE_WM_LINES_SHIFT; + temp |= p_wm->wm[level].cursor_res_b; - /* - * Interrupts will be enabled in ironlake_irq_postinstall - */ + if (p_wm->wm[level].cursor_en) + temp |= PLANE_WM_EN; - I915_WRITE(VIDSTART, vstart); - POSTING_READ(VIDSTART); + r->cursor[pipe][level] = temp; - rgvmodectl |= MEMMODE_SWMODE_EN; - I915_WRITE(MEMMODECTL, rgvmodectl); + } - if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) - DRM_ERROR("stuck trying to change perf mode\n"); - mdelay(1); + /* transition WMs */ + for (i = 0; i < intel_num_planes(intel_crtc); i++) { + temp = 0; + temp |= p_wm->trans_wm.plane_res_l[i] << PLANE_WM_LINES_SHIFT; + temp |= p_wm->trans_wm.plane_res_b[i]; + if (p_wm->trans_wm.plane_en[i]) + temp |= PLANE_WM_EN; - ironlake_set_drps(dev, fstart); + r->plane_trans[pipe][i] = temp; + } - dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) + - I915_READ(0x112e0); - dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); - dev_priv->ips.last_count2 = I915_READ(0x112f4); - dev_priv->ips.last_time2 = ktime_get_raw_ns(); + temp = 0; + temp |= p_wm->trans_wm.cursor_res_l << PLANE_WM_LINES_SHIFT; + temp |= p_wm->trans_wm.cursor_res_b; + if (p_wm->trans_wm.cursor_en) + temp |= PLANE_WM_EN; - spin_unlock_irq(&mchdev_lock); + r->cursor_trans[pipe] = temp; + + r->wm_linetime[pipe] = p_wm->linetime; } -static void ironlake_disable_drps(struct drm_device *dev) +static void skl_ddb_entry_write(struct drm_i915_private *dev_priv, uint32_t reg, + const struct skl_ddb_entry *entry) { - struct drm_i915_private *dev_priv = dev->dev_private; - u16 rgvswctl; + if (entry->end) + I915_WRITE(reg, (entry->end - 1) << 16 | entry->start); + else + I915_WRITE(reg, 0); +} - spin_lock_irq(&mchdev_lock); +static void skl_write_wm_values(struct drm_i915_private *dev_priv, + const struct skl_wm_values *new) +{ + struct drm_device *dev = dev_priv->dev; + struct intel_crtc *crtc; - rgvswctl = I915_READ16(MEMSWCTL); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) { + int i, level, max_level = ilk_wm_max_level(dev); + enum pipe pipe = crtc->pipe; - /* Ack interrupts, disable EFC interrupt */ - I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); - I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG); - I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT); - I915_WRITE(DEIIR, DE_PCU_EVENT); - I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); + if (!new->dirty[pipe]) + continue; - /* Go back to the starting frequency */ - ironlake_set_drps(dev, dev_priv->ips.fstart); - mdelay(1); - rgvswctl |= MEMCTL_CMD_STS; - I915_WRITE(MEMSWCTL, rgvswctl); - mdelay(1); + I915_WRITE(PIPE_WM_LINETIME(pipe), new->wm_linetime[pipe]); - spin_unlock_irq(&mchdev_lock); + for (level = 0; level <= max_level; level++) { + for (i = 0; i < intel_num_planes(crtc); i++) + I915_WRITE(PLANE_WM(pipe, i, level), + new->plane[pipe][i][level]); + I915_WRITE(CUR_WM(pipe, level), + new->cursor[pipe][level]); + } + for (i = 0; i < intel_num_planes(crtc); i++) + I915_WRITE(PLANE_WM_TRANS(pipe, i), + new->plane_trans[pipe][i]); + I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]); + + for (i = 0; i < intel_num_planes(crtc); i++) + skl_ddb_entry_write(dev_priv, + PLANE_BUF_CFG(pipe, i), + &new->ddb.plane[pipe][i]); + + skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), + &new->ddb.cursor[pipe]); + } } -/* There's a funny hw issue where the hw returns all 0 when reading from - * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value - * ourselves, instead of doing a rmw cycle (which might result in us clearing - * all limits and the gpu stuck at whatever frequency it is at atm). +/* + * When setting up a new DDB allocation arrangement, we need to correctly + * sequence the times at which the new allocations for the pipes are taken into + * account or we'll have pipes fetching from space previously allocated to + * another pipe. + * + * Roughly the sequence looks like: + * 1. re-allocate the pipe(s) with the allocation being reduced and not + * overlapping with a previous light-up pipe (another way to put it is: + * pipes with their new allocation strickly included into their old ones). + * 2. re-allocate the other pipes that get their allocation reduced + * 3. allocate the pipes having their allocation increased + * + * Steps 1. and 2. are here to take care of the following case: + * - Initially DDB looks like this: + * | B | C | + * - enable pipe A. + * - pipe B has a reduced DDB allocation that overlaps with the old pipe C + * allocation + * | A | B | C | + * + * We need to sequence the re-allocation: C, B, A (and not B, C, A). */ -static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val) + +static void +skl_wm_flush_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, int pass) { - u32 limits; + struct drm_device *dev = dev_priv->dev; + int plane; - /* Only set the down limit when we've reached the lowest level to avoid - * getting more interrupts, otherwise leave this clear. This prevents a - * race in the hw when coming out of rc6: There's a tiny window where - * the hw runs at the minimal clock before selecting the desired - * frequency, if the down threshold expires in that window we will not - * receive a down interrupt. */ - limits = dev_priv->rps.max_freq_softlimit << 24; - if (val <= dev_priv->rps.min_freq_softlimit) - limits |= dev_priv->rps.min_freq_softlimit << 16; + DRM_DEBUG_KMS("flush pipe %c (pass %d)\n", pipe_name(pipe), pass); - return limits; + for_each_plane(pipe, plane) { + I915_WRITE(PLANE_SURF(pipe, plane), + I915_READ(PLANE_SURF(pipe, plane))); + } + I915_WRITE(CURBASE(pipe), I915_READ(CURBASE(pipe))); } -static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) +static bool +skl_ddb_allocation_included(const struct skl_ddb_allocation *old, + const struct skl_ddb_allocation *new, + enum pipe pipe) { - int new_power; + uint16_t old_size, new_size; - new_power = dev_priv->rps.power; - switch (dev_priv->rps.power) { - case LOW_POWER: - if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq) - new_power = BETWEEN; - break; + old_size = skl_ddb_entry_size(&old->pipe[pipe]); + new_size = skl_ddb_entry_size(&new->pipe[pipe]); - case BETWEEN: - if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq) - new_power = LOW_POWER; - else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq) - new_power = HIGH_POWER; - break; + return old_size != new_size && + new->pipe[pipe].start >= old->pipe[pipe].start && + new->pipe[pipe].end <= old->pipe[pipe].end; +} - case HIGH_POWER: - if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq) - new_power = BETWEEN; - break; - } - /* Max/min bins are special */ - if (val == dev_priv->rps.min_freq_softlimit) - new_power = LOW_POWER; - if (val == dev_priv->rps.max_freq_softlimit) - new_power = HIGH_POWER; - if (new_power == dev_priv->rps.power) - return; +static void skl_flush_wm_values(struct drm_i915_private *dev_priv, + struct skl_wm_values *new_values) +{ + struct drm_device *dev = dev_priv->dev; + struct skl_ddb_allocation *cur_ddb, *new_ddb; + bool reallocated[I915_MAX_PIPES] = {false, false, false}; + struct intel_crtc *crtc; + enum pipe pipe; - /* Note the units here are not exactly 1us, but 1280ns. */ - switch (new_power) { - case LOW_POWER: - /* Upclock if more than 95% busy over 16ms */ - I915_WRITE(GEN6_RP_UP_EI, 12500); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800); + new_ddb = &new_values->ddb; + cur_ddb = &dev_priv->wm.skl_hw.ddb; - /* Downclock if less than 85% busy over 32ms */ - I915_WRITE(GEN6_RP_DOWN_EI, 25000); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250); + /* + * First pass: flush the pipes with the new allocation contained into + * the old space. + * + * We'll wait for the vblank on those pipes to ensure we can safely + * re-allocate the freed space without this pipe fetching from it. + */ + for_each_intel_crtc(dev, crtc) { + if (!crtc->active) + continue; - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - break; + pipe = crtc->pipe; - case BETWEEN: - /* Upclock if more than 90% busy over 13ms */ - I915_WRITE(GEN6_RP_UP_EI, 10250); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225); - - /* Downclock if less than 75% busy over 32ms */ - I915_WRITE(GEN6_RP_DOWN_EI, 25000); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750); - - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - break; - - case HIGH_POWER: - /* Upclock if more than 85% busy over 10ms */ - I915_WRITE(GEN6_RP_UP_EI, 8000); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800); + if (!skl_ddb_allocation_included(cur_ddb, new_ddb, pipe)) + continue; - /* Downclock if less than 60% busy over 32ms */ - I915_WRITE(GEN6_RP_DOWN_EI, 25000); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000); + skl_wm_flush_pipe(dev_priv, pipe, 1); + intel_wait_for_vblank(dev, pipe); - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); - break; + reallocated[pipe] = true; } - dev_priv->rps.power = new_power; - dev_priv->rps.last_adj = 0; -} -static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) -{ - u32 mask = 0; + /* + * Second pass: flush the pipes that are having their allocation + * reduced, but overlapping with a previous allocation. + * + * Here as well we need to wait for the vblank to make sure the freed + * space is not used anymore. + */ + for_each_intel_crtc(dev, crtc) { + if (!crtc->active) + continue; - if (val > dev_priv->rps.min_freq_softlimit) - mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; - if (val < dev_priv->rps.max_freq_softlimit) - mask |= GEN6_PM_RP_UP_THRESHOLD; + pipe = crtc->pipe; - mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED); - mask &= dev_priv->pm_rps_events; + if (reallocated[pipe]) + continue; - /* IVB and SNB hard hangs on looping batchbuffer - * if GEN6_PM_UP_EI_EXPIRED is masked. + if (skl_ddb_entry_size(&new_ddb->pipe[pipe]) < + skl_ddb_entry_size(&cur_ddb->pipe[pipe])) { + skl_wm_flush_pipe(dev_priv, pipe, 2); + intel_wait_for_vblank(dev, pipe); + reallocated[pipe] = true; + } + } + + /* + * Third pass: flush the pipes that got more space allocated. + * + * We don't need to actively wait for the update here, next vblank + * will just get more DDB space with the correct WM values. */ - if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev)) - mask |= GEN6_PM_RP_UP_EI_EXPIRED; + for_each_intel_crtc(dev, crtc) { + if (!crtc->active) + continue; - if (IS_GEN8(dev_priv->dev)) - mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP; + pipe = crtc->pipe; - return ~mask; + /* + * At this point, only the pipes more space than before are + * left to re-allocate. + */ + if (reallocated[pipe]) + continue; + + skl_wm_flush_pipe(dev_priv, pipe, 3); + } } -/* gen6_set_rps is called to update the frequency request, but should also be - * called when the range (min_delay and max_delay) is modified so that we can - * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ -void gen6_set_rps(struct drm_device *dev, u8 val) +static bool skl_update_pipe_wm(struct drm_crtc *crtc, + struct skl_pipe_wm_parameters *params, + struct intel_wm_config *config, + struct skl_ddb_allocation *ddb, /* out */ + struct skl_pipe_wm *pipe_wm /* out */) { - struct drm_i915_private *dev_priv = dev->dev_private; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq_softlimit); - WARN_ON(val < dev_priv->rps.min_freq_softlimit); - - /* min/max delay may still have been modified so be sure to - * write the limits value. - */ - if (val != dev_priv->rps.cur_freq) { - gen6_set_rps_thresholds(dev_priv, val); - - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(val)); - else - I915_WRITE(GEN6_RPNSWREQ, - GEN6_FREQUENCY(val) | - GEN6_OFFSET(0) | - GEN6_AGGRESSIVE_TURBO); - } + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - /* Make sure we continue to get interrupts - * until we hit the minimum or maximum frequencies. - */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, gen6_rps_limits(dev_priv, val)); - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); + skl_compute_wm_pipe_parameters(crtc, params); + skl_allocate_pipe_ddb(crtc, config, params, ddb); + skl_compute_pipe_wm(crtc, ddb, params, pipe_wm); - POSTING_READ(GEN6_RPNSWREQ); + if (!memcmp(&intel_crtc->wm.skl_active, pipe_wm, sizeof(*pipe_wm))) + return false; - dev_priv->rps.cur_freq = val; - trace_intel_gpu_freq_change(val * 50); + intel_crtc->wm.skl_active = *pipe_wm; + return true; } -/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down - * - * * If Gfx is Idle, then - * 1. Mask Turbo interrupts - * 2. Bring up Gfx clock - * 3. Change the freq to Rpn and wait till P-Unit updates freq - * 4. Clear the Force GFX CLK ON bit so that Gfx can down - * 5. Unmask Turbo interrupts -*/ -static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) +static void skl_update_other_pipe_wm(struct drm_device *dev, + struct drm_crtc *crtc, + struct intel_wm_config *config, + struct skl_wm_values *r) { - struct drm_device *dev = dev_priv->dev; - - /* Latest VLV doesn't need to force the gfx clock */ - if (dev->pdev->revision >= 0xd) { - valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); - return; - } + struct intel_crtc *intel_crtc; + struct intel_crtc *this_crtc = to_intel_crtc(crtc); /* - * When we are idle. Drop to min voltage state. + * If the WM update hasn't changed the allocation for this_crtc (the + * crtc we are currently computing the new WM values for), other + * enabled crtcs will keep the same allocation and we don't need to + * recompute anything for them. */ - - if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit) + if (!skl_ddb_allocation_changed(&r->ddb, this_crtc)) return; - /* Mask turbo interrupt so that they will not come in between */ - I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); - - vlv_force_gfx_clock(dev_priv, true); - - dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit; - - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, - dev_priv->rps.min_freq_softlimit); - - if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS)) - & GENFREQSTATUS) == 0, 5)) - DRM_ERROR("timed out waiting for Punit\n"); - - vlv_force_gfx_clock(dev_priv, false); + /* + * Otherwise, because of this_crtc being freshly enabled/disabled, the + * other active pipes need new DDB allocation and WM values. + */ + list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, + base.head) { + struct skl_pipe_wm_parameters params = {}; + struct skl_pipe_wm pipe_wm = {}; + bool wm_changed; - I915_WRITE(GEN6_PMINTRMSK, - gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); -} + if (this_crtc->pipe == intel_crtc->pipe) + continue; -void gen6_rps_idle(struct drm_i915_private *dev_priv) -{ - struct drm_device *dev = dev_priv->dev; + if (!intel_crtc->active) + continue; - mutex_lock(&dev_priv->rps.hw_lock); - if (dev_priv->rps.enabled) { - if (IS_CHERRYVIEW(dev)) - valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); - else if (IS_VALLEYVIEW(dev)) - vlv_set_rps_idle(dev_priv); - else - gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); - dev_priv->rps.last_adj = 0; - } - mutex_unlock(&dev_priv->rps.hw_lock); -} + wm_changed = skl_update_pipe_wm(&intel_crtc->base, + ¶ms, config, + &r->ddb, &pipe_wm); -void gen6_rps_boost(struct drm_i915_private *dev_priv) -{ - struct drm_device *dev = dev_priv->dev; + /* + * If we end up re-computing the other pipe WM values, it's + * because it was really needed, so we expect the WM values to + * be different. + */ + WARN_ON(!wm_changed); - mutex_lock(&dev_priv->rps.hw_lock); - if (dev_priv->rps.enabled) { - if (IS_VALLEYVIEW(dev)) - valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); - else - gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); - dev_priv->rps.last_adj = 0; + skl_compute_wm_results(dev, ¶ms, &pipe_wm, r, intel_crtc); + r->dirty[intel_crtc->pipe] = true; } - mutex_unlock(&dev_priv->rps.hw_lock); } -void valleyview_set_rps(struct drm_device *dev, u8 val) +static void skl_update_wm(struct drm_crtc *crtc) { + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; + struct skl_pipe_wm_parameters params = {}; + struct skl_wm_values *results = &dev_priv->wm.skl_results; + struct skl_pipe_wm pipe_wm = {}; + struct intel_wm_config config = {}; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq_softlimit); - WARN_ON(val < dev_priv->rps.min_freq_softlimit); + memset(results, 0, sizeof(*results)); - if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1), - "Odd GPU freq value\n")) - val &= ~1; + skl_compute_wm_global_parameters(dev, &config); - if (val != dev_priv->rps.cur_freq) { - DRM_DEBUG_DRIVER("GPU freq request from %d MHz (%u) to %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq, - vlv_gpu_freq(dev_priv, val), val); + if (!skl_update_pipe_wm(crtc, ¶ms, &config, + &results->ddb, &pipe_wm)) + return; - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - } + skl_compute_wm_results(dev, ¶ms, &pipe_wm, results, intel_crtc); + results->dirty[intel_crtc->pipe] = true; - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); + skl_update_other_pipe_wm(dev, crtc, &config, results); + skl_write_wm_values(dev_priv, results); + skl_flush_wm_values(dev_priv, results); - dev_priv->rps.cur_freq = val; - trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv, val)); + /* store the new configuration */ + dev_priv->wm.skl_hw = *results; } -static void gen8_disable_rps_interrupts(struct drm_device *dev) +static void +skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc, + uint32_t sprite_width, uint32_t sprite_height, + int pixel_size, bool enabled, bool scaled) { - struct drm_i915_private *dev_priv = dev->dev_private; - - I915_WRITE(GEN6_PMINTRMSK, ~GEN8_PMINTR_REDIRECT_TO_NON_DISP); - I915_WRITE(GEN8_GT_IER(2), I915_READ(GEN8_GT_IER(2)) & - ~dev_priv->pm_rps_events); - /* Complete PM interrupt masking here doesn't race with the rps work - * item again unmasking PM interrupts because that is using a different - * register (GEN8_GT_IMR(2)) to mask PM interrupts. The only risk is in - * leaving stale bits in GEN8_GT_IIR(2) and GEN8_GT_IMR(2) which - * gen8_enable_rps will clean up. */ + struct intel_plane *intel_plane = to_intel_plane(plane); - spin_lock_irq(&dev_priv->irq_lock); - dev_priv->rps.pm_iir = 0; - spin_unlock_irq(&dev_priv->irq_lock); + intel_plane->wm.enabled = enabled; + intel_plane->wm.scaled = scaled; + intel_plane->wm.horiz_pixels = sprite_width; + intel_plane->wm.vert_pixels = sprite_height; + intel_plane->wm.bytes_per_pixel = pixel_size; - I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events); + skl_update_wm(crtc); } -static void gen6_disable_rps_interrupts(struct drm_device *dev) +static void ilk_update_wm(struct drm_crtc *crtc) { + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; + struct ilk_wm_maximums max; + struct ilk_pipe_wm_parameters params = {}; + struct ilk_wm_values results = {}; + enum intel_ddb_partitioning partitioning; + struct intel_pipe_wm pipe_wm = {}; + struct intel_pipe_wm lp_wm_1_2 = {}, lp_wm_5_6 = {}, *best_lp_wm; + struct intel_wm_config config = {}; - I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); - I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & - ~dev_priv->pm_rps_events); - /* Complete PM interrupt masking here doesn't race with the rps work - * item again unmasking PM interrupts because that is using a different - * register (PMIMR) to mask PM interrupts. The only risk is in leaving - * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */ - - spin_lock_irq(&dev_priv->irq_lock); - dev_priv->rps.pm_iir = 0; - spin_unlock_irq(&dev_priv->irq_lock); + ilk_compute_wm_parameters(crtc, ¶ms); - I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events); -} + intel_compute_pipe_wm(crtc, ¶ms, &pipe_wm); -static void gen6_disable_rps(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + if (!memcmp(&intel_crtc->wm.active, &pipe_wm, sizeof(pipe_wm))) + return; - I915_WRITE(GEN6_RC_CONTROL, 0); - I915_WRITE(GEN6_RPNSWREQ, 1 << 31); + intel_crtc->wm.active = pipe_wm; - if (IS_BROADWELL(dev)) - gen8_disable_rps_interrupts(dev); - else - gen6_disable_rps_interrupts(dev); -} + ilk_compute_wm_config(dev, &config); -static void cherryview_disable_rps(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_1_2, &max); + ilk_wm_merge(dev, &config, &max, &lp_wm_1_2); - I915_WRITE(GEN6_RC_CONTROL, 0); + /* 5/6 split only in single pipe config on IVB+ */ + if (INTEL_INFO(dev)->gen >= 7 && + config.num_pipes_active == 1 && config.sprites_enabled) { + ilk_compute_wm_maximums(dev, 1, &config, INTEL_DDB_PART_5_6, &max); + ilk_wm_merge(dev, &config, &max, &lp_wm_5_6); - gen8_disable_rps_interrupts(dev); -} + best_lp_wm = ilk_find_best_result(dev, &lp_wm_1_2, &lp_wm_5_6); + } else { + best_lp_wm = &lp_wm_1_2; + } -static void valleyview_disable_rps(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + partitioning = (best_lp_wm == &lp_wm_1_2) ? + INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6; - /* we're doing forcewake before Disabling RC6, - * This what the BIOS expects when going into suspend */ - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + ilk_compute_wm_results(dev, best_lp_wm, partitioning, &results); - I915_WRITE(GEN6_RC_CONTROL, 0); + ilk_write_wm_values(dev_priv, &results); +} - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); +static void +ilk_update_sprite_wm(struct drm_plane *plane, + struct drm_crtc *crtc, + uint32_t sprite_width, uint32_t sprite_height, + int pixel_size, bool enabled, bool scaled) +{ + struct drm_device *dev = plane->dev; + struct intel_plane *intel_plane = to_intel_plane(plane); + + intel_plane->wm.enabled = enabled; + intel_plane->wm.scaled = scaled; + intel_plane->wm.horiz_pixels = sprite_width; + intel_plane->wm.vert_pixels = sprite_width; + intel_plane->wm.bytes_per_pixel = pixel_size; + + /* + * IVB workaround: must disable low power watermarks for at least + * one frame before enabling scaling. LP watermarks can be re-enabled + * when scaling is disabled. + * + * WaCxSRDisabledForSpriteScaling:ivb + */ + if (IS_IVYBRIDGE(dev) && scaled && ilk_disable_lp_wm(dev)) + intel_wait_for_vblank(dev, intel_plane->pipe); - gen6_disable_rps_interrupts(dev); + ilk_update_wm(crtc); } -static void intel_print_rc6_info(struct drm_device *dev, u32 mode) +static void skl_pipe_wm_active_state(uint32_t val, + struct skl_pipe_wm *active, + bool is_transwm, + bool is_cursor, + int i, + int level) { - if (IS_VALLEYVIEW(dev)) { - if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) - mode = GEN6_RC_CTL_RC6_ENABLE; - else - mode = 0; + bool is_enabled = (val & PLANE_WM_EN) != 0; + + if (!is_transwm) { + if (!is_cursor) { + active->wm[level].plane_en[i] = is_enabled; + active->wm[level].plane_res_b[i] = + val & PLANE_WM_BLOCKS_MASK; + active->wm[level].plane_res_l[i] = + (val >> PLANE_WM_LINES_SHIFT) & + PLANE_WM_LINES_MASK; + } else { + active->wm[level].cursor_en = is_enabled; + active->wm[level].cursor_res_b = + val & PLANE_WM_BLOCKS_MASK; + active->wm[level].cursor_res_l = + (val >> PLANE_WM_LINES_SHIFT) & + PLANE_WM_LINES_MASK; + } + } else { + if (!is_cursor) { + active->trans_wm.plane_en[i] = is_enabled; + active->trans_wm.plane_res_b[i] = + val & PLANE_WM_BLOCKS_MASK; + active->trans_wm.plane_res_l[i] = + (val >> PLANE_WM_LINES_SHIFT) & + PLANE_WM_LINES_MASK; + } else { + active->trans_wm.cursor_en = is_enabled; + active->trans_wm.cursor_res_b = + val & PLANE_WM_BLOCKS_MASK; + active->trans_wm.cursor_res_l = + (val >> PLANE_WM_LINES_SHIFT) & + PLANE_WM_LINES_MASK; + } } - DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s, RC6p %s, RC6pp %s\n", - (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off", - (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off", - (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off"); } -static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6) +static void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc) { - /* No RC6 before Ironlake */ - if (INTEL_INFO(dev)->gen < 5) - return 0; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct skl_wm_values *hw = &dev_priv->wm.skl_hw; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct skl_pipe_wm *active = &intel_crtc->wm.skl_active; + enum pipe pipe = intel_crtc->pipe; + int level, i, max_level; + uint32_t temp; - /* RC6 is only on Ironlake mobile not on desktop */ - if (INTEL_INFO(dev)->gen == 5 && !IS_IRONLAKE_M(dev)) - return 0; + max_level = ilk_wm_max_level(dev); - /* Respect the kernel parameter if it is set */ - if (enable_rc6 >= 0) { - int mask; + hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); - if (INTEL_INFO(dev)->gen == 6 || IS_IVYBRIDGE(dev)) - mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | - INTEL_RC6pp_ENABLE; - else - mask = INTEL_RC6_ENABLE; + for (level = 0; level <= max_level; level++) { + for (i = 0; i < intel_num_planes(intel_crtc); i++) + hw->plane[pipe][i][level] = + I915_READ(PLANE_WM(pipe, i, level)); + hw->cursor[pipe][level] = I915_READ(CUR_WM(pipe, level)); + } - if ((enable_rc6 & mask) != enable_rc6) - DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n", - enable_rc6 & mask, enable_rc6, mask); + for (i = 0; i < intel_num_planes(intel_crtc); i++) + hw->plane_trans[pipe][i] = I915_READ(PLANE_WM_TRANS(pipe, i)); + hw->cursor_trans[pipe] = I915_READ(CUR_WM_TRANS(pipe)); - return enable_rc6 & mask; - } + if (!intel_crtc_active(crtc)) + return; - /* Disable RC6 on Ironlake */ - if (INTEL_INFO(dev)->gen == 5) - return 0; + hw->dirty[pipe] = true; - if (IS_IVYBRIDGE(dev)) - return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); + active->linetime = hw->wm_linetime[pipe]; - return INTEL_RC6_ENABLE; -} + for (level = 0; level <= max_level; level++) { + for (i = 0; i < intel_num_planes(intel_crtc); i++) { + temp = hw->plane[pipe][i][level]; + skl_pipe_wm_active_state(temp, active, false, + false, i, level); + } + temp = hw->cursor[pipe][level]; + skl_pipe_wm_active_state(temp, active, false, true, i, level); + } -int intel_enable_rc6(const struct drm_device *dev) -{ - return i915.enable_rc6; + for (i = 0; i < intel_num_planes(intel_crtc); i++) { + temp = hw->plane_trans[pipe][i]; + skl_pipe_wm_active_state(temp, active, true, false, i, 0); + } + + temp = hw->cursor_trans[pipe]; + skl_pipe_wm_active_state(temp, active, true, true, i, 0); } -static void gen8_enable_rps_interrupts(struct drm_device *dev) +void skl_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; + struct drm_crtc *crtc; - spin_lock_irq(&dev_priv->irq_lock); - WARN_ON(dev_priv->rps.pm_iir); - gen8_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); - I915_WRITE(GEN8_GT_IIR(2), dev_priv->pm_rps_events); - spin_unlock_irq(&dev_priv->irq_lock); + skl_ddb_get_hw_state(dev_priv, ddb); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) + skl_pipe_wm_get_hw_state(crtc); } -static void gen6_enable_rps_interrupts(struct drm_device *dev) +static void ilk_pipe_wm_get_hw_state(struct drm_crtc *crtc) { + struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; + struct ilk_wm_values *hw = &dev_priv->wm.hw; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_pipe_wm *active = &intel_crtc->wm.active; + enum pipe pipe = intel_crtc->pipe; + static const unsigned int wm0_pipe_reg[] = { + [PIPE_A] = WM0_PIPEA_ILK, + [PIPE_B] = WM0_PIPEB_ILK, + [PIPE_C] = WM0_PIPEC_IVB, + }; - spin_lock_irq(&dev_priv->irq_lock); - WARN_ON(dev_priv->rps.pm_iir); - gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); - I915_WRITE(GEN6_PMIIR, dev_priv->pm_rps_events); - spin_unlock_irq(&dev_priv->irq_lock); -} + hw->wm_pipe[pipe] = I915_READ(wm0_pipe_reg[pipe]); + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + hw->wm_linetime[pipe] = I915_READ(PIPE_WM_LINETIME(pipe)); -static void parse_rp_state_cap(struct drm_i915_private *dev_priv, u32 rp_state_cap) -{ - /* All of these values are in units of 50MHz */ - dev_priv->rps.cur_freq = 0; - /* static values from HW: RP0 < RPe < RP1 < RPn (min_freq) */ - dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; - dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; - dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; - /* XXX: only BYT has a special efficient freq */ - dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; - /* hw_max = RP0 until we check for overclocking */ - dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; + active->pipe_enabled = intel_crtc_active(crtc); - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + if (active->pipe_enabled) { + u32 tmp = hw->wm_pipe[pipe]; - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + /* + * For active pipes LP0 watermark is marked as + * enabled, and LP1+ watermaks as disabled since + * we can't really reverse compute them in case + * multiple pipes are active. + */ + active->wm[0].enable = true; + active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT; + active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT; + active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK; + active->linetime = hw->wm_linetime[pipe]; + } else { + int level, max_level = ilk_wm_max_level(dev); + + /* + * For inactive pipes, all watermark levels + * should be marked as enabled but zeroed, + * which is what we'd compute them to. + */ + for (level = 0; level <= max_level; level++) + active->wm[level].enable = true; + } } -static void gen8_enable_rps(struct drm_device *dev) +void ilk_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring; - uint32_t rc6_mask = 0, rp_state_cap; - int unused; + struct ilk_wm_values *hw = &dev_priv->wm.hw; + struct drm_crtc *crtc; - /* 1a: Software RC state - RC0 */ - I915_WRITE(GEN6_RC_STATE, 0); + for_each_crtc(dev, crtc) + ilk_pipe_wm_get_hw_state(crtc); - /* 1c & 1d: Get forcewake during program sequence. Although the driver - * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + hw->wm_lp[0] = I915_READ(WM1_LP_ILK); + hw->wm_lp[1] = I915_READ(WM2_LP_ILK); + hw->wm_lp[2] = I915_READ(WM3_LP_ILK); - /* 2a: Disable RC states. */ - I915_WRITE(GEN6_RC_CONTROL, 0); + hw->wm_lp_spr[0] = I915_READ(WM1S_LP_ILK); + if (INTEL_INFO(dev)->gen >= 7) { + hw->wm_lp_spr[1] = I915_READ(WM2S_LP_IVB); + hw->wm_lp_spr[2] = I915_READ(WM3S_LP_IVB); + } - rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - parse_rp_state_cap(dev_priv, rp_state_cap); + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + hw->partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ? + INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; + else if (IS_IVYBRIDGE(dev)) + hw->partitioning = (I915_READ(DISP_ARB_CTL2) & DISP_DATA_PARTITION_5_6) ? + INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2; - /* 2b: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - for_each_ring(ring, dev_priv, unused) - I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); - I915_WRITE(GEN6_RC_SLEEP, 0); - if (IS_BROADWELL(dev)) - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ - else - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + hw->enable_fbc_wm = + !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS); +} - /* 3: Enable RC6 */ - if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) - rc6_mask = GEN6_RC_CTL_RC6_ENABLE; - intel_print_rc6_info(dev, rc6_mask); - if (IS_BROADWELL(dev)) - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - rc6_mask); - else - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_EI_MODE(1) | - rc6_mask); +/** + * intel_update_watermarks - update FIFO watermark values based on current modes + * + * Calculate watermark values for the various WM regs based on current mode + * and plane configuration. + * + * There are several cases to deal with here: + * - normal (i.e. non-self-refresh) + * - self-refresh (SR) mode + * - lines are large relative to FIFO size (buffer can hold up to 2) + * - lines are small relative to FIFO size (buffer can hold more than 2 + * lines), so need to account for TLB latency + * + * The normal calculation is: + * watermark = dotclock * bytes per pixel * latency + * where latency is platform & configuration dependent (we assume pessimal + * values here). + * + * The SR calculation is: + * watermark = (trunc(latency/line time)+1) * surface width * + * bytes per pixel + * where + * line time = htotal / dotclock + * surface width = hdisplay for normal plane and 64 for cursor + * and latency is assumed to be high, as above. + * + * The final value programmed to the register should always be rounded up, + * and include an extra 2 entries to account for clock crossings. + * + * We don't use the sprite, so we can ignore that. And on Crestline we have + * to set the non-SR watermarks to 8. + */ +void intel_update_watermarks(struct drm_crtc *crtc) +{ + struct drm_i915_private *dev_priv = crtc->dev->dev_private; - /* 4 Program defaults and thresholds for RPS*/ - I915_WRITE(GEN6_RPNSWREQ, - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); - I915_WRITE(GEN6_RC_VIDEO_FREQ, - HSW_FREQUENCY(dev_priv->rps.rp1_freq)); - /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ + if (dev_priv->display.update_wm) + dev_priv->display.update_wm(crtc); +} - /* Docs recommend 900MHz, and 300 MHz respectively */ - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - dev_priv->rps.max_freq_softlimit << 24 | - dev_priv->rps.min_freq_softlimit << 16); +void intel_update_sprite_watermarks(struct drm_plane *plane, + struct drm_crtc *crtc, + uint32_t sprite_width, + uint32_t sprite_height, + int pixel_size, + bool enabled, bool scaled) +{ + struct drm_i915_private *dev_priv = plane->dev->dev_private; - I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ - I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ - I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ + if (dev_priv->display.update_sprite_wm) + dev_priv->display.update_sprite_wm(plane, crtc, + sprite_width, sprite_height, + pixel_size, enabled, scaled); +} - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); +static struct drm_i915_gem_object * +intel_alloc_context_page(struct drm_device *dev) +{ + struct drm_i915_gem_object *ctx; + int ret; - /* 5: Enable RPS */ - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_AVG); + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - /* 6: Ring frequency + overclocking (our driver does this later */ + ctx = i915_gem_alloc_object(dev, 4096); + if (!ctx) { + DRM_DEBUG("failed to alloc power context, RC6 disabled\n"); + return NULL; + } - gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8); + ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0); + if (ret) { + DRM_ERROR("failed to pin power context: %d\n", ret); + goto err_unref; + } - gen8_enable_rps_interrupts(dev); + ret = i915_gem_object_set_to_gtt_domain(ctx, 1); + if (ret) { + DRM_ERROR("failed to set-domain on power context: %d\n", ret); + goto err_unpin; + } - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + return ctx; + +err_unpin: + i915_gem_object_ggtt_unpin(ctx); +err_unref: + drm_gem_object_unreference(&ctx->base); + return NULL; } -static void gen6_enable_rps(struct drm_device *dev) +/** + * Lock protecting IPS related data structures + */ +DEFINE_SPINLOCK(mchdev_lock); + +/* Global for IPS driver to get at the current i915 device. Protected by + * mchdev_lock. */ +static struct drm_i915_private *i915_mch_dev; + +bool ironlake_set_drps(struct drm_device *dev, u8 val) { struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring; - u32 rp_state_cap; - u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; - u32 gtfifodbg; - int rc6_mode; - int i, ret; - - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + u16 rgvswctl; - /* Here begins a magic sequence of register writes to enable - * auto-downclocking. - * - * Perhaps there might be some value in exposing these to - * userspace... - */ - I915_WRITE(GEN6_RC_STATE, 0); + assert_spin_locked(&mchdev_lock); - /* Clear the DBG now so we don't confuse earlier errors */ - if ((gtfifodbg = I915_READ(GTFIFODBG))) { - DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); + rgvswctl = I915_READ16(MEMSWCTL); + if (rgvswctl & MEMCTL_CMD_STS) { + DRM_DEBUG("gpu busy, RCS change rejected\n"); + return false; /* still busy with another command */ } - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) | + (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM; + I915_WRITE16(MEMSWCTL, rgvswctl); + POSTING_READ16(MEMSWCTL); - rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + rgvswctl |= MEMCTL_CMD_STS; + I915_WRITE16(MEMSWCTL, rgvswctl); - parse_rp_state_cap(dev_priv, rp_state_cap); + return true; +} - /* disable the counters and set deterministic thresholds */ - I915_WRITE(GEN6_RC_CONTROL, 0); +static void ironlake_enable_drps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 rgvmodectl = I915_READ(MEMMODECTL); + u8 fmax, fmin, fstart, vstart; - I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); - I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); - I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); - I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); + spin_lock_irq(&mchdev_lock); - for_each_ring(ring, dev_priv, i) - I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); + /* Enable temp reporting */ + I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); + I915_WRITE16(TSC1, I915_READ(TSC1) | TSE); - I915_WRITE(GEN6_RC_SLEEP, 0); - I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); - if (IS_IVYBRIDGE(dev)) - I915_WRITE(GEN6_RC6_THRESHOLD, 125000); - else - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); - I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); - I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ + /* 100ms RC evaluation intervals */ + I915_WRITE(RCUPEI, 100000); + I915_WRITE(RCDNEI, 100000); - /* Check if we are enabling RC6 */ - rc6_mode = intel_enable_rc6(dev_priv->dev); - if (rc6_mode & INTEL_RC6_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; + /* Set max/min thresholds to 90ms and 80ms respectively */ + I915_WRITE(RCBMAXAVG, 90000); + I915_WRITE(RCBMINAVG, 80000); - /* We don't use those on Haswell */ - if (!IS_HASWELL(dev)) { - if (rc6_mode & INTEL_RC6p_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; + I915_WRITE(MEMIHYST, 1); - if (rc6_mode & INTEL_RC6pp_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; - } + /* Set up min, max, and cur for interrupt handling */ + fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT; + fmin = (rgvmodectl & MEMMODE_FMIN_MASK); + fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >> + MEMMODE_FSTART_SHIFT; - intel_print_rc6_info(dev, rc6_mask); + vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >> + PXVFREQ_PX_SHIFT; - I915_WRITE(GEN6_RC_CONTROL, - rc6_mask | - GEN6_RC_CTL_EI_MODE(1) | - GEN6_RC_CTL_HW_ENABLE); + dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ + dev_priv->ips.fstart = fstart; - /* Power down if completely idle for over 50ms */ - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + dev_priv->ips.max_delay = fstart; + dev_priv->ips.min_delay = fmin; + dev_priv->ips.cur_delay = fstart; - ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0); - if (ret) - DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); + DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", + fmax, fmin, fstart); - ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); - if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ - DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", - (dev_priv->rps.max_freq_softlimit & 0xff) * 50, - (pcu_mbox & 0xff) * 50); - dev_priv->rps.max_freq = pcu_mbox & 0xff; - } + I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN); - dev_priv->rps.power = HIGH_POWER; /* force a reset */ - gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); + /* + * Interrupts will be enabled in ironlake_irq_postinstall + */ - gen6_enable_rps_interrupts(dev); + I915_WRITE(VIDSTART, vstart); + POSTING_READ(VIDSTART); - rc6vids = 0; - ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); - if (IS_GEN6(dev) && ret) { - DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); - } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { - DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", - GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); - rc6vids &= 0xffff00; - rc6vids |= GEN6_ENCODE_RC6_VID(450); - ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); - if (ret) - DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); - } + rgvmodectl |= MEMMODE_SWMODE_EN; + I915_WRITE(MEMMODECTL, rgvmodectl); - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) + DRM_ERROR("stuck trying to change perf mode\n"); + mdelay(1); + + ironlake_set_drps(dev, fstart); + + dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) + + I915_READ(0x112e0); + dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); + dev_priv->ips.last_count2 = I915_READ(0x112f4); + dev_priv->ips.last_time2 = ktime_get_raw_ns(); + + spin_unlock_irq(&mchdev_lock); } -static void __gen6_update_ring_freq(struct drm_device *dev) +static void ironlake_disable_drps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int min_freq = 15; - unsigned int gpu_freq; - unsigned int max_ia_freq, min_ring_freq; - int scaling_factor = 180; - struct cpufreq_policy *policy; + u16 rgvswctl; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + spin_lock_irq(&mchdev_lock); - policy = cpufreq_cpu_get(0); - if (policy) { - max_ia_freq = policy->cpuinfo.max_freq; - cpufreq_cpu_put(policy); - } else { - /* - * Default to measured freq if none found, PCU will ensure we - * don't go over - */ - max_ia_freq = tsc_khz; - } + rgvswctl = I915_READ16(MEMSWCTL); - /* Convert from kHz to MHz */ - max_ia_freq /= 1000; + /* Ack interrupts, disable EFC interrupt */ + I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); + I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG); + I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT); + I915_WRITE(DEIIR, DE_PCU_EVENT); + I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); - min_ring_freq = I915_READ(DCLK) & 0xf; - /* convert DDR frequency from units of 266.6MHz to bandwidth */ - min_ring_freq = mult_frac(min_ring_freq, 8, 3); + /* Go back to the starting frequency */ + ironlake_set_drps(dev, dev_priv->ips.fstart); + mdelay(1); + rgvswctl |= MEMCTL_CMD_STS; + I915_WRITE(MEMSWCTL, rgvswctl); + mdelay(1); - /* - * For each potential GPU frequency, load a ring frequency we'd like - * to use for memory access. We do this by specifying the IA frequency - * the PCU should use as a reference to determine the ring frequency. - */ - for (gpu_freq = dev_priv->rps.max_freq_softlimit; gpu_freq >= dev_priv->rps.min_freq_softlimit; - gpu_freq--) { - int diff = dev_priv->rps.max_freq_softlimit - gpu_freq; - unsigned int ia_freq = 0, ring_freq = 0; + spin_unlock_irq(&mchdev_lock); +} - if (INTEL_INFO(dev)->gen >= 8) { - /* max(2 * GT, DDR). NB: GT is 50MHz units */ - ring_freq = max(min_ring_freq, gpu_freq); - } else if (IS_HASWELL(dev)) { - ring_freq = mult_frac(gpu_freq, 5, 4); - ring_freq = max(min_ring_freq, ring_freq); - /* leave ia_freq as the default, chosen by cpufreq */ - } else { - /* On older processors, there is no separate ring - * clock domain, so in order to boost the bandwidth - * of the ring, we need to upclock the CPU (ia_freq). - * - * For GPU frequencies less than 750MHz, - * just use the lowest ring freq. - */ - if (gpu_freq < min_freq) - ia_freq = 800; - else - ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); - ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); - } +/* There's a funny hw issue where the hw returns all 0 when reading from + * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value + * ourselves, instead of doing a rmw cycle (which might result in us clearing + * all limits and the gpu stuck at whatever frequency it is at atm). + */ +static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 val) +{ + u32 limits; - sandybridge_pcode_write(dev_priv, - GEN6_PCODE_WRITE_MIN_FREQ_TABLE, - ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | - ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | - gpu_freq); - } + /* Only set the down limit when we've reached the lowest level to avoid + * getting more interrupts, otherwise leave this clear. This prevents a + * race in the hw when coming out of rc6: There's a tiny window where + * the hw runs at the minimal clock before selecting the desired + * frequency, if the down threshold expires in that window we will not + * receive a down interrupt. */ + limits = dev_priv->rps.max_freq_softlimit << 24; + if (val <= dev_priv->rps.min_freq_softlimit) + limits |= dev_priv->rps.min_freq_softlimit << 16; + + return limits; } -void gen6_update_ring_freq(struct drm_device *dev) +static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) { - struct drm_i915_private *dev_priv = dev->dev_private; + int new_power; - if (INTEL_INFO(dev)->gen < 6 || IS_VALLEYVIEW(dev)) + new_power = dev_priv->rps.power; + switch (dev_priv->rps.power) { + case LOW_POWER: + if (val > dev_priv->rps.efficient_freq + 1 && val > dev_priv->rps.cur_freq) + new_power = BETWEEN; + break; + + case BETWEEN: + if (val <= dev_priv->rps.efficient_freq && val < dev_priv->rps.cur_freq) + new_power = LOW_POWER; + else if (val >= dev_priv->rps.rp0_freq && val > dev_priv->rps.cur_freq) + new_power = HIGH_POWER; + break; + + case HIGH_POWER: + if (val < (dev_priv->rps.rp1_freq + dev_priv->rps.rp0_freq) >> 1 && val < dev_priv->rps.cur_freq) + new_power = BETWEEN; + break; + } + /* Max/min bins are special */ + if (val == dev_priv->rps.min_freq_softlimit) + new_power = LOW_POWER; + if (val == dev_priv->rps.max_freq_softlimit) + new_power = HIGH_POWER; + if (new_power == dev_priv->rps.power) return; - mutex_lock(&dev_priv->rps.hw_lock); - __gen6_update_ring_freq(dev); - mutex_unlock(&dev_priv->rps.hw_lock); -} + /* Note the units here are not exactly 1us, but 1280ns. */ + switch (new_power) { + case LOW_POWER: + /* Upclock if more than 95% busy over 16ms */ + I915_WRITE(GEN6_RP_UP_EI, 12500); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 11800); -static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp0; + /* Downclock if less than 85% busy over 32ms */ + I915_WRITE(GEN6_RP_DOWN_EI, 25000); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 21250); - val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); - rp0 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK; + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + break; - return rp0; -} + case BETWEEN: + /* Upclock if more than 90% busy over 13ms */ + I915_WRITE(GEN6_RP_UP_EI, 10250); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 9225); -static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpe; + /* Downclock if less than 75% busy over 32ms */ + I915_WRITE(GEN6_RP_DOWN_EI, 25000); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 18750); - val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); - rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + break; - return rpe; -} + case HIGH_POWER: + /* Upclock if more than 85% busy over 10ms */ + I915_WRITE(GEN6_RP_UP_EI, 8000); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 6800); -static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp1; + /* Downclock if less than 60% busy over 32ms */ + I915_WRITE(GEN6_RP_DOWN_EI, 25000); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 15000); - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - rp1 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK; + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + break; + } - return rp1; + dev_priv->rps.power = new_power; + dev_priv->rps.last_adj = 0; } -static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv) +static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) { - u32 val, rpn; + u32 mask = 0; - val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); - rpn = (val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) & PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK; - return rpn; -} + if (val > dev_priv->rps.min_freq_softlimit) + mask |= GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; + if (val < dev_priv->rps.max_freq_softlimit) + mask |= GEN6_PM_RP_UP_THRESHOLD; -static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rp1; + mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED); + mask &= dev_priv->pm_rps_events; - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); + /* IVB and SNB hard hangs on looping batchbuffer + * if GEN6_PM_UP_EI_EXPIRED is masked. + */ + if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev)) + mask |= GEN6_PM_RP_UP_EI_EXPIRED; - rp1 = (val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK) >> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; + if (IS_GEN8(dev_priv->dev)) + mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP; - return rp1; + return ~mask; } -static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) +/* gen6_set_rps is called to update the frequency request, but should also be + * called when the range (min_delay and max_delay) is modified so that we can + * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ +void gen6_set_rps(struct drm_device *dev, u8 val) { - u32 val, rp0; + struct drm_i915_private *dev_priv = dev->dev_private; - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(val > dev_priv->rps.max_freq_softlimit); + WARN_ON(val < dev_priv->rps.min_freq_softlimit); - rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; - /* Clamp to max */ - rp0 = min_t(u32, rp0, 0xea); + /* min/max delay may still have been modified so be sure to + * write the limits value. + */ + if (val != dev_priv->rps.cur_freq) { + gen6_set_rps_thresholds(dev_priv, val); - return rp0; -} + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + I915_WRITE(GEN6_RPNSWREQ, + HSW_FREQUENCY(val)); + else + I915_WRITE(GEN6_RPNSWREQ, + GEN6_FREQUENCY(val) | + GEN6_OFFSET(0) | + GEN6_AGGRESSIVE_TURBO); + } -static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) -{ - u32 val, rpe; + /* Make sure we continue to get interrupts + * until we hit the minimum or maximum frequencies. + */ + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, gen6_rps_limits(dev_priv, val)); + I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); - rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; - val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); - rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; + POSTING_READ(GEN6_RPNSWREQ); - return rpe; + dev_priv->rps.cur_freq = val; + trace_intel_gpu_freq_change(val * 50); } -static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) +/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down + * + * * If Gfx is Idle, then + * 1. Mask Turbo interrupts + * 2. Bring up Gfx clock + * 3. Change the freq to Rpn and wait till P-Unit updates freq + * 4. Clear the Force GFX CLK ON bit so that Gfx can down + * 5. Unmask Turbo interrupts +*/ +static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { - return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; -} + struct drm_device *dev = dev_priv->dev; -/* Check that the pctx buffer wasn't move under us. */ -static void valleyview_check_pctx(struct drm_i915_private *dev_priv) -{ - unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; + /* Latest VLV doesn't need to force the gfx clock */ + if (dev->pdev->revision >= 0xd) { + valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); + return; + } - WARN_ON(pctx_addr != dev_priv->mm.stolen_base + - dev_priv->vlv_pctx->stolen->start); -} + /* + * When we are idle. Drop to min voltage state. + */ + if (dev_priv->rps.cur_freq <= dev_priv->rps.min_freq_softlimit) + return; -/* Check that the pcbr address is not empty. */ -static void cherryview_check_pctx(struct drm_i915_private *dev_priv) -{ - unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; + /* Mask turbo interrupt so that they will not come in between */ + I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); - WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); -} + vlv_force_gfx_clock(dev_priv, true); -static void cherryview_setup_pctx(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - unsigned long pctx_paddr, paddr; - struct i915_gtt *gtt = &dev_priv->gtt; - u32 pcbr; - int pctx_size = 32*1024; + dev_priv->rps.cur_freq = dev_priv->rps.min_freq_softlimit; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, + dev_priv->rps.min_freq_softlimit); - pcbr = I915_READ(VLV_PCBR); - if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { - paddr = (dev_priv->mm.stolen_base + - (gtt->stolen_size - pctx_size)); + if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS)) + & GENFREQSTATUS) == 0, 100)) + DRM_ERROR("timed out waiting for Punit\n"); - pctx_paddr = (paddr & (~4095)); - I915_WRITE(VLV_PCBR, pctx_paddr); - } + vlv_force_gfx_clock(dev_priv, false); + + I915_WRITE(GEN6_PMINTRMSK, + gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); } -static void valleyview_setup_pctx(struct drm_device *dev) +void gen6_rps_idle(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_gem_object *pctx; - unsigned long pctx_paddr; - u32 pcbr; - int pctx_size = 24*1024; - - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - - pcbr = I915_READ(VLV_PCBR); - if (pcbr) { - /* BIOS set it up already, grab the pre-alloc'd space */ - int pcbr_offset; - - pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; - pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev, - pcbr_offset, - I915_GTT_OFFSET_NONE, - pctx_size); - goto out; - } + struct drm_device *dev = dev_priv->dev; - /* - * From the Gunit register HAS: - * The Gfx driver is expected to program this register and ensure - * proper allocation within Gfx stolen memory. For example, this - * register should be programmed such than the PCBR range does not - * overlap with other ranges, such as the frame buffer, protected - * memory, or any other relevant ranges. - */ - pctx = i915_gem_object_create_stolen(dev, pctx_size); - if (!pctx) { - DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); - return; + mutex_lock(&dev_priv->rps.hw_lock); + if (dev_priv->rps.enabled) { + if (IS_CHERRYVIEW(dev)) + valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); + else if (IS_VALLEYVIEW(dev)) + vlv_set_rps_idle(dev_priv); + else + gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); + dev_priv->rps.last_adj = 0; } - - pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; - I915_WRITE(VLV_PCBR, pctx_paddr); - -out: - dev_priv->vlv_pctx = pctx; + mutex_unlock(&dev_priv->rps.hw_lock); } -static void valleyview_cleanup_pctx(struct drm_device *dev) +void gen6_rps_boost(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - - if (WARN_ON(!dev_priv->vlv_pctx)) - return; + struct drm_device *dev = dev_priv->dev; - drm_gem_object_unreference(&dev_priv->vlv_pctx->base); - dev_priv->vlv_pctx = NULL; + mutex_lock(&dev_priv->rps.hw_lock); + if (dev_priv->rps.enabled) { + if (IS_VALLEYVIEW(dev)) + valleyview_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); + else + gen6_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit); + dev_priv->rps.last_adj = 0; + } + mutex_unlock(&dev_priv->rps.hw_lock); } -static void valleyview_init_gt_powersave(struct drm_device *dev) +void valleyview_set_rps(struct drm_device *dev, u8 val) { struct drm_i915_private *dev_priv = dev->dev_private; - u32 val; - valleyview_setup_pctx(dev); + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + WARN_ON(val > dev_priv->rps.max_freq_softlimit); + WARN_ON(val < dev_priv->rps.min_freq_softlimit); - mutex_lock(&dev_priv->rps.hw_lock); + if (WARN_ONCE(IS_CHERRYVIEW(dev) && (val & 1), + "Odd GPU freq value\n")) + val &= ~1; - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - switch ((val >> 6) & 3) { - case 0: - case 1: - dev_priv->mem_freq = 800; - break; - case 2: - dev_priv->mem_freq = 1066; - break; - case 3: - dev_priv->mem_freq = 1333; - break; - } - DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq); + if (val != dev_priv->rps.cur_freq) + vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq), - dev_priv->rps.max_freq); + I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); + dev_priv->rps.cur_freq = val; + trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv, val)); +} - dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); - DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), - dev_priv->rps.rp1_freq); +static void gen9_disable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; - dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq), - dev_priv->rps.min_freq); + I915_WRITE(GEN6_RC_CONTROL, 0); +} - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; +static void gen6_disable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + I915_WRITE(GEN6_RC_CONTROL, 0); + I915_WRITE(GEN6_RPNSWREQ, 1 << 31); +} - mutex_unlock(&dev_priv->rps.hw_lock); +static void cherryview_disable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + I915_WRITE(GEN6_RC_CONTROL, 0); } -static void cherryview_init_gt_powersave(struct drm_device *dev) +static void valleyview_disable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - u32 val; - cherryview_setup_pctx(dev); + /* we're doing forcewake before Disabling RC6, + * This what the BIOS expects when going into suspend */ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); - mutex_lock(&dev_priv->rps.hw_lock); + I915_WRITE(GEN6_RC_CONTROL, 0); - val = vlv_punit_read(dev_priv, CCK_FUSE_REG); - switch ((val >> 2) & 0x7) { - case 0: - case 1: - dev_priv->rps.cz_freq = 200; - dev_priv->mem_freq = 1600; - break; - case 2: - dev_priv->rps.cz_freq = 267; - dev_priv->mem_freq = 1600; - break; - case 3: - dev_priv->rps.cz_freq = 333; - dev_priv->mem_freq = 2000; - break; - case 4: - dev_priv->rps.cz_freq = 320; - dev_priv->mem_freq = 1600; - break; - case 5: - dev_priv->rps.cz_freq = 400; - dev_priv->mem_freq = 1600; - break; + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); +} + +static void intel_print_rc6_info(struct drm_device *dev, u32 mode) +{ + if (IS_VALLEYVIEW(dev)) { + if (mode & (GEN7_RC_CTL_TO_MODE | GEN6_RC_CTL_EI_MODE(1))) + mode = GEN6_RC_CTL_RC6_ENABLE; + else + mode = 0; } - DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq); + if (HAS_RC6p(dev)) + DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s RC6p %s RC6pp %s\n", + (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off", + (mode & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off", + (mode & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off"); - dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); - dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; - DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq), - dev_priv->rps.max_freq); + else + DRM_DEBUG_KMS("Enabling RC6 states: RC6 %s\n", + (mode & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off"); +} - dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); - DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); +static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6) +{ + /* No RC6 before Ironlake */ + if (INTEL_INFO(dev)->gen < 5) + return 0; - dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); - DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), - dev_priv->rps.rp1_freq); + /* RC6 is only on Ironlake mobile not on desktop */ + if (INTEL_INFO(dev)->gen == 5 && !IS_IRONLAKE_M(dev)) + return 0; - dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); - DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq), - dev_priv->rps.min_freq); + /* Respect the kernel parameter if it is set */ + if (enable_rc6 >= 0) { + int mask; - WARN_ONCE((dev_priv->rps.max_freq | - dev_priv->rps.efficient_freq | - dev_priv->rps.rp1_freq | - dev_priv->rps.min_freq) & 1, - "Odd GPU freq values\n"); + if (HAS_RC6p(dev)) + mask = INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE | + INTEL_RC6pp_ENABLE; + else + mask = INTEL_RC6_ENABLE; - /* Preserve min/max settings in case of re-init */ - if (dev_priv->rps.max_freq_softlimit == 0) - dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; + if ((enable_rc6 & mask) != enable_rc6) + DRM_DEBUG_KMS("Adjusting RC6 mask to %d (requested %d, valid %d)\n", + enable_rc6 & mask, enable_rc6, mask); - if (dev_priv->rps.min_freq_softlimit == 0) - dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + return enable_rc6 & mask; + } - mutex_unlock(&dev_priv->rps.hw_lock); + /* Disable RC6 on Ironlake */ + if (INTEL_INFO(dev)->gen == 5) + return 0; + + if (IS_IVYBRIDGE(dev)) + return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); + + return INTEL_RC6_ENABLE; } -static void valleyview_cleanup_gt_powersave(struct drm_device *dev) +int intel_enable_rc6(const struct drm_device *dev) { - valleyview_cleanup_pctx(dev); + return i915.enable_rc6; } -static void cherryview_enable_rps(struct drm_device *dev) +static void gen6_init_rps_frequencies(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring; - u32 gtfifodbg, val, rc6_mode = 0, pcbr; - int i; + uint32_t rp_state_cap; + u32 ddcc_status = 0; + int ret; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + /* All of these values are in units of 50MHz */ + dev_priv->rps.cur_freq = 0; + /* static values from HW: RP0 > RP1 > RPn (min_freq) */ + dev_priv->rps.rp0_freq = (rp_state_cap >> 0) & 0xff; + dev_priv->rps.rp1_freq = (rp_state_cap >> 8) & 0xff; + dev_priv->rps.min_freq = (rp_state_cap >> 16) & 0xff; + /* hw_max = RP0 until we check for overclocking */ + dev_priv->rps.max_freq = dev_priv->rps.rp0_freq; - gtfifodbg = I915_READ(GTFIFODBG); - if (gtfifodbg) { - DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", - gtfifodbg); - I915_WRITE(GTFIFODBG, gtfifodbg); + dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + ret = sandybridge_pcode_read(dev_priv, + HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL, + &ddcc_status); + if (0 == ret) + dev_priv->rps.efficient_freq = + (ddcc_status >> 8) & 0xff; } - cherryview_check_pctx(dev_priv); + /* Preserve min/max settings in case of re-init */ + if (dev_priv->rps.max_freq_softlimit == 0) + dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - /* 1a & 1b: Get forcewake during program sequence. Although the driver + if (dev_priv->rps.min_freq_softlimit == 0) { + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + dev_priv->rps.min_freq_softlimit = + /* max(RPe, 450 MHz) */ + max(dev_priv->rps.efficient_freq, (u8) 9); + else + dev_priv->rps.min_freq_softlimit = + dev_priv->rps.min_freq; + } +} + +static void gen9_enable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + uint32_t rc6_mask = 0; + int unused; + + /* 1a: Software RC state - RC0 */ + I915_WRITE(GEN6_RC_STATE, 0); + + /* 1b: Get forcewake during program sequence. Although the driver * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); - /* 2a: Program RC6 thresholds.*/ - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + /* 2a: Disable RC states. */ + I915_WRITE(GEN6_RC_CONTROL, 0); + + /* 2b: Program RC6 thresholds.*/ + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16); I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ - - for_each_ring(ring, dev_priv, i) + for_each_ring(ring, dev_priv, unused) I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); I915_WRITE(GEN6_RC_SLEEP, 0); + I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + /* 3a: Enable RC6 */ + if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) + rc6_mask = GEN6_RC_CTL_RC6_ENABLE; + DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? + "on" : "off"); + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_EI_MODE(1) | + rc6_mask); - /* allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); - /* For now we assume BIOS is allocating and populating the PCBR */ - pcbr = I915_READ(VLV_PCBR); +} + +static void gen8_enable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + uint32_t rc6_mask = 0; + int unused; - DRM_DEBUG_DRIVER("PCBR offset : 0x%x\n", pcbr); + /* 1a: Software RC state - RC0 */ + I915_WRITE(GEN6_RC_STATE, 0); - /* 3: Enable RC6 */ - if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) && - (pcbr >> VLV_PCBR_ADDR_SHIFT)) - rc6_mode = GEN6_RC_CTL_EI_MODE(1); + /* 1c & 1d: Get forcewake during program sequence. Although the driver + * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); + /* 2a: Disable RC states. */ + I915_WRITE(GEN6_RC_CONTROL, 0); + + /* Initialize rps frequencies */ + gen6_init_rps_frequencies(dev); + + /* 2b: Program RC6 thresholds.*/ + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_ring(ring, dev_priv, unused) + I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); + I915_WRITE(GEN6_RC_SLEEP, 0); + if (IS_BROADWELL(dev)) + I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */ + else + I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + + /* 3: Enable RC6 */ + if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) + rc6_mask = GEN6_RC_CTL_RC6_ENABLE; + intel_print_rc6_info(dev, rc6_mask); + if (IS_BROADWELL(dev)) + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | + GEN7_RC_CTL_TO_MODE | + rc6_mask); + else + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_EI_MODE(1) | + rc6_mask); /* 4 Program defaults and thresholds for RPS*/ - I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); - I915_WRITE(GEN6_RP_UP_EI, 66000); - I915_WRITE(GEN6_RP_DOWN_EI, 350000); + I915_WRITE(GEN6_RPNSWREQ, + HSW_FREQUENCY(dev_priv->rps.rp1_freq)); + I915_WRITE(GEN6_RC_VIDEO_FREQ, + HSW_FREQUENCY(dev_priv->rps.rp1_freq)); + /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + /* Docs recommend 900MHz, and 300 MHz respectively */ + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, + dev_priv->rps.max_freq_softlimit << 24 | + dev_priv->rps.min_freq_softlimit << 16); - /* WaDisablePwrmtrEvent:chv (pre-production hw) */ - I915_WRITE(0xA80C, I915_READ(0xA80C) & 0x00ffffff); - I915_WRITE(0xA810, I915_READ(0xA810) & 0xffffff00); + I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ + I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ + I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ + + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); /* 5: Enable RPS */ I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | /* WaSetMaskForGfxBusyness:chv (pre-production hw ?) */ + GEN6_RP_MEDIA_IS_GFX | GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 0x10 ? "yes" : "no"); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); - - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); - - valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq); + /* 6: Ring frequency + overclocking (our driver does this later */ - gen8_enable_rps_interrupts(dev); + dev_priv->rps.power = HIGH_POWER; /* force a reset */ + gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } -static void valleyview_enable_rps(struct drm_device *dev) +static void gen6_enable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; - u32 gtfifodbg, val, rc6_mode = 0; - int i; + u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; + u32 gtfifodbg; + int rc6_mode; + int i, ret; WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - valleyview_check_pctx(dev_priv); + /* Here begins a magic sequence of register writes to enable + * auto-downclocking. + * + * Perhaps there might be some value in exposing these to + * userspace... + */ + I915_WRITE(GEN6_RC_STATE, 0); + /* Clear the DBG now so we don't confuse earlier errors */ if ((gtfifodbg = I915_READ(GTFIFODBG))) { - DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", - gtfifodbg); + DRM_ERROR("GT fifo had a previous error %x\n", gtfifodbg); I915_WRITE(GTFIFODBG, gtfifodbg); } - /* If VLV, Forcewake all wells, else re-direct to regular path */ gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); - I915_WRITE(GEN6_RP_UP_EI, 66000); - I915_WRITE(GEN6_RP_DOWN_EI, 350000); - - I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240); + /* Initialize rps frequencies */ + gen6_init_rps_frequencies(dev); - I915_WRITE(GEN6_RP_CONTROL, - GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_NORMAL_MODE | - GEN6_RP_MEDIA_IS_GFX | - GEN6_RP_ENABLE | - GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_CONT); + /* disable the counters and set deterministic thresholds */ + I915_WRITE(GEN6_RC_CONTROL, 0); - I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + I915_WRITE(GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16); + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30); + I915_WRITE(GEN6_RC6pp_WAKE_RATE_LIMIT, 30); I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); for_each_ring(ring, dev_priv, i) I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); - I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - - /* allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN | - VLV_RENDER_RC0_COUNT_EN | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); - - if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) - rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; + I915_WRITE(GEN6_RC_SLEEP, 0); + I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); + if (IS_IVYBRIDGE(dev)) + I915_WRITE(GEN6_RC6_THRESHOLD, 125000); + else + I915_WRITE(GEN6_RC6_THRESHOLD, 50000); + I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); + I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ - intel_print_rc6_info(dev, rc6_mode); + /* Check if we are enabling RC6 */ + rc6_mode = intel_enable_rc6(dev_priv->dev); + if (rc6_mode & INTEL_RC6_ENABLE) + rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; - I915_WRITE(GEN6_RC_CONTROL, rc6_mode); + /* We don't use those on Haswell */ + if (!IS_HASWELL(dev)) { + if (rc6_mode & INTEL_RC6p_ENABLE) + rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; - val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + if (rc6_mode & INTEL_RC6pp_ENABLE) + rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; + } - DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & 0x10 ? "yes" : "no"); - DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); + intel_print_rc6_info(dev, rc6_mask); - dev_priv->rps.cur_freq = (val >> 8) & 0xff; - DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq), - dev_priv->rps.cur_freq); + I915_WRITE(GEN6_RC_CONTROL, + rc6_mask | + GEN6_RC_CTL_EI_MODE(1) | + GEN6_RC_CTL_HW_ENABLE); - DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", - vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), - dev_priv->rps.efficient_freq); + /* Power down if completely idle for over 50ms */ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000); + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq); + ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0); + if (ret) + DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); - gen6_enable_rps_interrupts(dev); + ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); + if (!ret && (pcu_mbox & (1<<31))) { /* OC supported */ + DRM_DEBUG_DRIVER("Overclocking supported. Max: %dMHz, Overclock max: %dMHz\n", + (dev_priv->rps.max_freq_softlimit & 0xff) * 50, + (pcu_mbox & 0xff) * 50); + dev_priv->rps.max_freq = pcu_mbox & 0xff; + } - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); -} + dev_priv->rps.power = HIGH_POWER; /* force a reset */ + gen6_set_rps(dev_priv->dev, dev_priv->rps.min_freq_softlimit); -void ironlake_teardown_rc6(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (dev_priv->ips.renderctx) { - i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx); - drm_gem_object_unreference(&dev_priv->ips.renderctx->base); - dev_priv->ips.renderctx = NULL; + rc6vids = 0; + ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); + if (IS_GEN6(dev) && ret) { + DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); + } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { + DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", + GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); + rc6vids &= 0xffff00; + rc6vids |= GEN6_ENCODE_RC6_VID(450); + ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); + if (ret) + DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); } - if (dev_priv->ips.pwrctx) { - i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx); - drm_gem_object_unreference(&dev_priv->ips.pwrctx->base); - dev_priv->ips.pwrctx = NULL; - } + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } -static void ironlake_disable_rc6(struct drm_device *dev) +static void __gen6_update_ring_freq(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + int min_freq = 15; + unsigned int gpu_freq; + unsigned int max_ia_freq, min_ring_freq; + int scaling_factor = 180; + struct cpufreq_policy *policy; - if (I915_READ(PWRCTXA)) { - /* Wake the GPU, prevent RC6, then restore RSTDBYCTL */ - I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT); - wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON), - 50); + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - I915_WRITE(PWRCTXA, 0); - POSTING_READ(PWRCTXA); + policy = cpufreq_cpu_get(0); + if (policy) { + max_ia_freq = policy->cpuinfo.max_freq; + cpufreq_cpu_put(policy); + } else { + /* + * Default to measured freq if none found, PCU will ensure we + * don't go over + */ + max_ia_freq = tsc_khz; + } - I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); - POSTING_READ(RSTDBYCTL); + /* Convert from kHz to MHz */ + max_ia_freq /= 1000; + + min_ring_freq = I915_READ(DCLK) & 0xf; + /* convert DDR frequency from units of 266.6MHz to bandwidth */ + min_ring_freq = mult_frac(min_ring_freq, 8, 3); + + /* + * For each potential GPU frequency, load a ring frequency we'd like + * to use for memory access. We do this by specifying the IA frequency + * the PCU should use as a reference to determine the ring frequency. + */ + for (gpu_freq = dev_priv->rps.max_freq; gpu_freq >= dev_priv->rps.min_freq; + gpu_freq--) { + int diff = dev_priv->rps.max_freq - gpu_freq; + unsigned int ia_freq = 0, ring_freq = 0; + + if (INTEL_INFO(dev)->gen >= 8) { + /* max(2 * GT, DDR). NB: GT is 50MHz units */ + ring_freq = max(min_ring_freq, gpu_freq); + } else if (IS_HASWELL(dev)) { + ring_freq = mult_frac(gpu_freq, 5, 4); + ring_freq = max(min_ring_freq, ring_freq); + /* leave ia_freq as the default, chosen by cpufreq */ + } else { + /* On older processors, there is no separate ring + * clock domain, so in order to boost the bandwidth + * of the ring, we need to upclock the CPU (ia_freq). + * + * For GPU frequencies less than 750MHz, + * just use the lowest ring freq. + */ + if (gpu_freq < min_freq) + ia_freq = 800; + else + ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); + ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); + } + + sandybridge_pcode_write(dev_priv, + GEN6_PCODE_WRITE_MIN_FREQ_TABLE, + ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT | + ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT | + gpu_freq); } } -static int ironlake_setup_rc6(struct drm_device *dev) +void gen6_update_ring_freq(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - if (dev_priv->ips.renderctx == NULL) - dev_priv->ips.renderctx = intel_alloc_context_page(dev); - if (!dev_priv->ips.renderctx) - return -ENOMEM; - - if (dev_priv->ips.pwrctx == NULL) - dev_priv->ips.pwrctx = intel_alloc_context_page(dev); - if (!dev_priv->ips.pwrctx) { - ironlake_teardown_rc6(dev); - return -ENOMEM; - } + if (INTEL_INFO(dev)->gen < 6 || IS_VALLEYVIEW(dev)) + return; - return 0; + mutex_lock(&dev_priv->rps.hw_lock); + __gen6_update_ring_freq(dev); + mutex_unlock(&dev_priv->rps.hw_lock); } -static void ironlake_enable_rc6(struct drm_device *dev) +static int cherryview_rps_max_freq(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring = &dev_priv->ring[RCS]; - bool was_interruptible; - int ret; + u32 val, rp0; - /* rc6 disabled by default due to repeated reports of hanging during - * boot and resume. - */ - if (!intel_enable_rc6(dev)) - return; + val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); + rp0 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + return rp0; +} - ret = ironlake_setup_rc6(dev); - if (ret) - return; +static int cherryview_rps_rpe_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpe; - was_interruptible = dev_priv->mm.interruptible; - dev_priv->mm.interruptible = false; + val = vlv_punit_read(dev_priv, PUNIT_GPU_DUTYCYCLE_REG); + rpe = (val >> PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT) & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK; - /* - * GPU can automatically power down the render unit if given a page - * to save state. - */ - ret = intel_ring_begin(ring, 6); - if (ret) { - ironlake_teardown_rc6(dev); - dev_priv->mm.interruptible = was_interruptible; - return; - } + return rpe; +} - intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); - intel_ring_emit(ring, MI_SET_CONTEXT); - intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) | - MI_MM_SPACE_GTT | - MI_SAVE_EXT_STATE_EN | - MI_RESTORE_EXT_STATE_EN | - MI_RESTORE_INHIBIT); - intel_ring_emit(ring, MI_SUSPEND_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_FLUSH); - intel_ring_advance(ring); +static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rp1; - /* - * Wait for the command parser to advance past MI_SET_CONTEXT. The HW - * does an implicit flush, combined with MI_FLUSH above, it should be - * safe to assume that renderctx is valid - */ - ret = intel_ring_idle(ring); - dev_priv->mm.interruptible = was_interruptible; - if (ret) { - DRM_ERROR("failed to enable ironlake power savings\n"); - ironlake_teardown_rc6(dev); - return; - } + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + rp1 = (val >> PUNIT_GPU_STATUS_MAX_FREQ_SHIFT) & PUNIT_GPU_STATUS_MAX_FREQ_MASK; - I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN); - I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); + return rp1; +} - intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE); +static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpn; + + val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); + rpn = (val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) & PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK; + return rpn; } -static unsigned long intel_pxfreq(u32 vidfreq) +static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) { - unsigned long freq; - int div = (vidfreq & 0x3f0000) >> 16; - int post = (vidfreq & 0x3000) >> 12; - int pre = (vidfreq & 0x7); + u32 val, rp1; - if (!pre) - return 0; + val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); - freq = ((div * 133333) / ((1<> FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT; - return freq; + return rp1; } -static const struct cparams { - u16 i; - u16 t; - u16 m; - u16 c; -} cparams[] = { - { 1, 1333, 301, 28664 }, - { 1, 1066, 294, 24460 }, - { 1, 800, 294, 25192 }, - { 0, 1333, 276, 27605 }, - { 0, 1066, 276, 27605 }, - { 0, 800, 231, 23784 }, -}; - -static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) +static int valleyview_rps_max_freq(struct drm_i915_private *dev_priv) { - u64 total_count, diff, ret; - u32 count1, count2, count3, m = 0, c = 0; - unsigned long now = jiffies_to_msecs(jiffies), diff1; - int i; + u32 val, rp0; - assert_spin_locked(&mchdev_lock); + val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FREQ_FUSE); - diff1 = now - dev_priv->ips.last_time1; + rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT; + /* Clamp to max */ + rp0 = min_t(u32, rp0, 0xea); - /* Prevent division-by-zero if we are asking too fast. - * Also, we don't get interesting results if we are polling - * faster than once in 10ms, so just return the saved value - * in such cases. - */ - if (diff1 <= 10) - return dev_priv->ips.chipset_power; + return rp0; +} - count1 = I915_READ(DMIEC); - count2 = I915_READ(DDREC); - count3 = I915_READ(CSIEC); +static int valleyview_rps_rpe_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpe; - total_count = count1 + count2 + count3; + val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_LO); + rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT; + val = vlv_nc_read(dev_priv, IOSF_NC_FB_GFX_FMAX_FUSE_HI); + rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5; - /* FIXME: handle per-counter overflow */ - if (total_count < dev_priv->ips.last_count1) { - diff = ~0UL - dev_priv->ips.last_count1; - diff += total_count; - } else { - diff = total_count - dev_priv->ips.last_count1; - } - - for (i = 0; i < ARRAY_SIZE(cparams); i++) { - if (cparams[i].i == dev_priv->ips.c_m && - cparams[i].t == dev_priv->ips.r_t) { - m = cparams[i].m; - c = cparams[i].c; - break; - } - } - - diff = div_u64(diff, diff1); - ret = ((m * diff) + c); - ret = div_u64(ret, 10); - - dev_priv->ips.last_count1 = total_count; - dev_priv->ips.last_time1 = now; - - dev_priv->ips.chipset_power = ret; - - return ret; + return rpe; } -unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) +static int valleyview_rps_min_freq(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; - unsigned long val; + return vlv_punit_read(dev_priv, PUNIT_REG_GPU_LFM) & 0xff; +} - if (INTEL_INFO(dev)->gen != 5) - return 0; +/* Check that the pctx buffer wasn't move under us. */ +static void valleyview_check_pctx(struct drm_i915_private *dev_priv) +{ + unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - spin_lock_irq(&mchdev_lock); + WARN_ON(pctx_addr != dev_priv->mm.stolen_base + + dev_priv->vlv_pctx->stolen->start); +} - val = __i915_chipset_val(dev_priv); - spin_unlock_irq(&mchdev_lock); +/* Check that the pcbr address is not empty. */ +static void cherryview_check_pctx(struct drm_i915_private *dev_priv) +{ + unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095; - return val; + WARN_ON((pctx_addr >> VLV_PCBR_ADDR_SHIFT) == 0); } -unsigned long i915_mch_val(struct drm_i915_private *dev_priv) +static void cherryview_setup_pctx(struct drm_device *dev) { - unsigned long m, x, b; - u32 tsfs; - - tsfs = I915_READ(TSFS); + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long pctx_paddr, paddr; + struct i915_gtt *gtt = &dev_priv->gtt; + u32 pcbr; + int pctx_size = 32*1024; - m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); - x = I915_READ8(TR1); + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - b = tsfs & TSFS_INTR_MASK; + pcbr = I915_READ(VLV_PCBR); + if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) { + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); + paddr = (dev_priv->mm.stolen_base + + (gtt->stolen_size - pctx_size)); - return ((m * x) / 127) - b; -} + pctx_paddr = (paddr & (~4095)); + I915_WRITE(VLV_PCBR, pctx_paddr); + } -static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) -{ - struct drm_device *dev = dev_priv->dev; - static const struct v_table { - u16 vd; /* in .1 mil */ - u16 vm; /* in .1 mil */ - } v_table[] = { - { 0, 0, }, - { 375, 0, }, - { 500, 0, }, - { 625, 0, }, - { 750, 0, }, - { 875, 0, }, - { 1000, 0, }, - { 1125, 0, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4125, 3000, }, - { 4250, 3125, }, - { 4375, 3250, }, - { 4500, 3375, }, - { 4625, 3500, }, - { 4750, 3625, }, - { 4875, 3750, }, - { 5000, 3875, }, - { 5125, 4000, }, - { 5250, 4125, }, - { 5375, 4250, }, - { 5500, 4375, }, - { 5625, 4500, }, - { 5750, 4625, }, - { 5875, 4750, }, - { 6000, 4875, }, - { 6125, 5000, }, - { 6250, 5125, }, - { 6375, 5250, }, - { 6500, 5375, }, - { 6625, 5500, }, - { 6750, 5625, }, - { 6875, 5750, }, - { 7000, 5875, }, - { 7125, 6000, }, - { 7250, 6125, }, - { 7375, 6250, }, - { 7500, 6375, }, - { 7625, 6500, }, - { 7750, 6625, }, - { 7875, 6750, }, - { 8000, 6875, }, - { 8125, 7000, }, - { 8250, 7125, }, - { 8375, 7250, }, - { 8500, 7375, }, - { 8625, 7500, }, - { 8750, 7625, }, - { 8875, 7750, }, - { 9000, 7875, }, - { 9125, 8000, }, - { 9250, 8125, }, - { 9375, 8250, }, - { 9500, 8375, }, - { 9625, 8500, }, - { 9750, 8625, }, - { 9875, 8750, }, - { 10000, 8875, }, - { 10125, 9000, }, - { 10250, 9125, }, - { 10375, 9250, }, - { 10500, 9375, }, - { 10625, 9500, }, - { 10750, 9625, }, - { 10875, 9750, }, - { 11000, 9875, }, - { 11125, 10000, }, - { 11250, 10125, }, - { 11375, 10250, }, - { 11500, 10375, }, - { 11625, 10500, }, - { 11750, 10625, }, - { 11875, 10750, }, - { 12000, 10875, }, - { 12125, 11000, }, - { 12250, 11125, }, - { 12375, 11250, }, - { 12500, 11375, }, - { 12625, 11500, }, - { 12750, 11625, }, - { 12875, 11750, }, - { 13000, 11875, }, - { 13125, 12000, }, - { 13250, 12125, }, - { 13375, 12250, }, - { 13500, 12375, }, - { 13625, 12500, }, - { 13750, 12625, }, - { 13875, 12750, }, - { 14000, 12875, }, - { 14125, 13000, }, - { 14250, 13125, }, - { 14375, 13250, }, - { 14500, 13375, }, - { 14625, 13500, }, - { 14750, 13625, }, - { 14875, 13750, }, - { 15000, 13875, }, - { 15125, 14000, }, - { 15250, 14125, }, - { 15375, 14250, }, - { 15500, 14375, }, - { 15625, 14500, }, - { 15750, 14625, }, - { 15875, 14750, }, - { 16000, 14875, }, - { 16125, 15000, }, - }; - if (INTEL_INFO(dev)->is_mobile) - return v_table[pxvid].vm; - else - return v_table[pxvid].vd; + DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); } -static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) +static void valleyview_setup_pctx(struct drm_device *dev) { - u64 now, diff, diffms; - u32 count; + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_object *pctx; + unsigned long pctx_paddr; + u32 pcbr; + int pctx_size = 24*1024; - assert_spin_locked(&mchdev_lock); + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - now = ktime_get_raw_ns(); - diffms = now - dev_priv->ips.last_time2; - do_div(diffms, NSEC_PER_MSEC); + pcbr = I915_READ(VLV_PCBR); + if (pcbr) { + /* BIOS set it up already, grab the pre-alloc'd space */ + int pcbr_offset; - /* Don't divide by 0 */ - if (!diffms) - return; + pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base; + pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev, + pcbr_offset, + I915_GTT_OFFSET_NONE, + pctx_size); + goto out; + } - count = I915_READ(GFXEC); + DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n"); - if (count < dev_priv->ips.last_count2) { - diff = ~0UL - dev_priv->ips.last_count2; - diff += count; - } else { - diff = count - dev_priv->ips.last_count2; + /* + * From the Gunit register HAS: + * The Gfx driver is expected to program this register and ensure + * proper allocation within Gfx stolen memory. For example, this + * register should be programmed such than the PCBR range does not + * overlap with other ranges, such as the frame buffer, protected + * memory, or any other relevant ranges. + */ + pctx = i915_gem_object_create_stolen(dev, pctx_size); + if (!pctx) { + DRM_DEBUG("not enough stolen space for PCTX, disabling\n"); + return; } - dev_priv->ips.last_count2 = count; - dev_priv->ips.last_time2 = now; + pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start; + I915_WRITE(VLV_PCBR, pctx_paddr); - /* More magic constants... */ - diff = diff * 1181; - diff = div_u64(diff, diffms * 10); - dev_priv->ips.gfx_power = diff; +out: + DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); + dev_priv->vlv_pctx = pctx; } -void i915_update_gfx_val(struct drm_i915_private *dev_priv) +static void valleyview_cleanup_pctx(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; + struct drm_i915_private *dev_priv = dev->dev_private; - if (INTEL_INFO(dev)->gen != 5) + if (WARN_ON(!dev_priv->vlv_pctx)) return; - spin_lock_irq(&mchdev_lock); - - __i915_update_gfx_val(dev_priv); - - spin_unlock_irq(&mchdev_lock); + drm_gem_object_unreference(&dev_priv->vlv_pctx->base); + dev_priv->vlv_pctx = NULL; } -static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) +static void valleyview_init_gt_powersave(struct drm_device *dev) { - unsigned long t, corr, state1, corr2, state2; - u32 pxvid, ext_v; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 val; - assert_spin_locked(&mchdev_lock); + valleyview_setup_pctx(dev); - pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_freq * 4)); - pxvid = (pxvid >> 24) & 0x7f; - ext_v = pvid_to_extvid(dev_priv, pxvid); + mutex_lock(&dev_priv->rps.hw_lock); - state1 = ext_v; + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); + switch ((val >> 6) & 3) { + case 0: + case 1: + dev_priv->mem_freq = 800; + break; + case 2: + dev_priv->mem_freq = 1066; + break; + case 3: + dev_priv->mem_freq = 1333; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - t = i915_mch_val(dev_priv); + dev_priv->rps.max_freq = valleyview_rps_max_freq(dev_priv); + dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq), + dev_priv->rps.max_freq); - /* Revel in the empirically derived constants */ + dev_priv->rps.efficient_freq = valleyview_rps_rpe_freq(dev_priv); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), + dev_priv->rps.efficient_freq); - /* Correction factor in 1/100000 units */ - if (t > 80) - corr = ((t * 2349) + 135940); - else if (t >= 50) - corr = ((t * 964) + 29317); - else /* < 50 */ - corr = ((t * 301) + 1004); + dev_priv->rps.rp1_freq = valleyview_rps_guar_freq(dev_priv); + DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), + dev_priv->rps.rp1_freq); - corr = corr * ((150142 * state1) / 10000 - 78642); - corr /= 100000; - corr2 = (corr * dev_priv->ips.corr); + dev_priv->rps.min_freq = valleyview_rps_min_freq(dev_priv); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq), + dev_priv->rps.min_freq); - state2 = (corr2 * state1) / 10000; - state2 /= 100; /* convert to mW */ + /* Preserve min/max settings in case of re-init */ + if (dev_priv->rps.max_freq_softlimit == 0) + dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - __i915_update_gfx_val(dev_priv); + if (dev_priv->rps.min_freq_softlimit == 0) + dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; - return dev_priv->ips.gfx_power + state2; + mutex_unlock(&dev_priv->rps.hw_lock); } -unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) +static void cherryview_init_gt_powersave(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - unsigned long val; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 val; - if (INTEL_INFO(dev)->gen != 5) - return 0; + cherryview_setup_pctx(dev); - spin_lock_irq(&mchdev_lock); + mutex_lock(&dev_priv->rps.hw_lock); - val = __i915_gfx_val(dev_priv); + mutex_lock(&dev_priv->dpio_lock); + val = vlv_cck_read(dev_priv, CCK_FUSE_REG); + mutex_unlock(&dev_priv->dpio_lock); - spin_unlock_irq(&mchdev_lock); + switch ((val >> 2) & 0x7) { + case 0: + case 1: + dev_priv->rps.cz_freq = 200; + dev_priv->mem_freq = 1600; + break; + case 2: + dev_priv->rps.cz_freq = 267; + dev_priv->mem_freq = 1600; + break; + case 3: + dev_priv->rps.cz_freq = 333; + dev_priv->mem_freq = 2000; + break; + case 4: + dev_priv->rps.cz_freq = 320; + dev_priv->mem_freq = 1600; + break; + case 5: + dev_priv->rps.cz_freq = 400; + dev_priv->mem_freq = 1600; + break; + } + DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", dev_priv->mem_freq); - return val; -} + dev_priv->rps.max_freq = cherryview_rps_max_freq(dev_priv); + dev_priv->rps.rp0_freq = dev_priv->rps.max_freq; + DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.max_freq), + dev_priv->rps.max_freq); -/** - * i915_read_mch_val - return value for IPS use - * - * Calculate and return a value for the IPS driver to use when deciding whether - * we have thermal and power headroom to increase CPU or GPU power budget. - */ -unsigned long i915_read_mch_val(void) -{ - struct drm_i915_private *dev_priv; - unsigned long chipset_val, graphics_val, ret = 0; + dev_priv->rps.efficient_freq = cherryview_rps_rpe_freq(dev_priv); + DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), + dev_priv->rps.efficient_freq); - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) - goto out_unlock; - dev_priv = i915_mch_dev; + dev_priv->rps.rp1_freq = cherryview_rps_guar_freq(dev_priv); + DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), + dev_priv->rps.rp1_freq); - chipset_val = __i915_chipset_val(dev_priv); - graphics_val = __i915_gfx_val(dev_priv); + dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); + DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.min_freq), + dev_priv->rps.min_freq); - ret = chipset_val + graphics_val; + WARN_ONCE((dev_priv->rps.max_freq | + dev_priv->rps.efficient_freq | + dev_priv->rps.rp1_freq | + dev_priv->rps.min_freq) & 1, + "Odd GPU freq values\n"); -out_unlock: - spin_unlock_irq(&mchdev_lock); + /* Preserve min/max settings in case of re-init */ + if (dev_priv->rps.max_freq_softlimit == 0) + dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq; - return ret; + if (dev_priv->rps.min_freq_softlimit == 0) + dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq; + + mutex_unlock(&dev_priv->rps.hw_lock); } -EXPORT_SYMBOL_GPL(i915_read_mch_val); -/** - * i915_gpu_raise - raise GPU frequency limit - * - * Raise the limit; IPS indicates we have thermal headroom. - */ -bool i915_gpu_raise(void) +static void valleyview_cleanup_gt_powersave(struct drm_device *dev) { - struct drm_i915_private *dev_priv; - bool ret = true; - - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) { - ret = false; - goto out_unlock; - } - dev_priv = i915_mch_dev; - - if (dev_priv->ips.max_delay > dev_priv->ips.fmax) - dev_priv->ips.max_delay--; - -out_unlock: - spin_unlock_irq(&mchdev_lock); - - return ret; + valleyview_cleanup_pctx(dev); } -EXPORT_SYMBOL_GPL(i915_gpu_raise); -/** - * i915_gpu_lower - lower GPU frequency limit - * - * IPS indicates we're close to a thermal limit, so throttle back the GPU - * frequency maximum. - */ -bool i915_gpu_lower(void) +static void cherryview_enable_rps(struct drm_device *dev) { - struct drm_i915_private *dev_priv; - bool ret = true; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + u32 gtfifodbg, val, rc6_mode = 0, pcbr; + int i; - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) { - ret = false; - goto out_unlock; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + + gtfifodbg = I915_READ(GTFIFODBG); + if (gtfifodbg) { + DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", + gtfifodbg); + I915_WRITE(GTFIFODBG, gtfifodbg); } - dev_priv = i915_mch_dev; - if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) - dev_priv->ips.max_delay++; + cherryview_check_pctx(dev_priv); -out_unlock: - spin_unlock_irq(&mchdev_lock); + /* 1a & 1b: Get forcewake during program sequence. Although the driver + * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); - return ret; -} -EXPORT_SYMBOL_GPL(i915_gpu_lower); + /* 2a: Program RC6 thresholds.*/ + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ -/** - * i915_gpu_busy - indicate GPU business to IPS - * - * Tell the IPS driver whether or not the GPU is busy. - */ -bool i915_gpu_busy(void) -{ - struct drm_i915_private *dev_priv; - struct intel_engine_cs *ring; - bool ret = false; - int i; + for_each_ring(ring, dev_priv, i) + I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); + I915_WRITE(GEN6_RC_SLEEP, 0); - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) - goto out_unlock; - dev_priv = i915_mch_dev; + /* TO threshold set to 1750 us ( 0x557 * 1.28 us) */ + I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - for_each_ring(ring, dev_priv, i) - ret |= !list_empty(&ring->request_list); + /* allows RC6 residency counter to work */ + I915_WRITE(VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); -out_unlock: - spin_unlock_irq(&mchdev_lock); + /* For now we assume BIOS is allocating and populating the PCBR */ + pcbr = I915_READ(VLV_PCBR); - return ret; -} -EXPORT_SYMBOL_GPL(i915_gpu_busy); + /* 3: Enable RC6 */ + if ((intel_enable_rc6(dev) & INTEL_RC6_ENABLE) && + (pcbr >> VLV_PCBR_ADDR_SHIFT)) + rc6_mode = GEN7_RC_CTL_TO_MODE; -/** - * i915_gpu_turbo_disable - disable graphics turbo - * - * Disable graphics turbo by resetting the max frequency and setting the - * current frequency to the default. - */ -bool i915_gpu_turbo_disable(void) -{ - struct drm_i915_private *dev_priv; - bool ret = true; + I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - spin_lock_irq(&mchdev_lock); - if (!i915_mch_dev) { - ret = false; - goto out_unlock; - } - dev_priv = i915_mch_dev; + /* 4 Program defaults and thresholds for RPS*/ + I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); + I915_WRITE(GEN6_RP_UP_EI, 66000); + I915_WRITE(GEN6_RP_DOWN_EI, 350000); - dev_priv->ips.max_delay = dev_priv->ips.fstart; + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); - if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart)) - ret = false; + /* WaDisablePwrmtrEvent:chv (pre-production hw) */ + I915_WRITE(0xA80C, I915_READ(0xA80C) & 0x00ffffff); + I915_WRITE(0xA810, I915_READ(0xA810) & 0xffffff00); -out_unlock: - spin_unlock_irq(&mchdev_lock); + /* 5: Enable RPS */ + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | /* WaSetMaskForGfxBusyness:chv (pre-production hw ?) */ + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); - return ret; -} -EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); -/** - * Tells the intel_ips driver that the i915 driver is now loaded, if - * IPS got loaded first. - * - * This awkward dance is so that neither module has to depend on the - * other in order for IPS to do the appropriate communication of - * GPU turbo limits to i915. - */ -static void -ips_ping_for_i915_load(void) -{ - void (*link)(void); + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); - link = symbol_get(ips_link_to_i915_driver); - if (link) { - link(); - symbol_put(ips_link_to_i915_driver); - } -} + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no"); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); -void intel_gpu_ips_init(struct drm_i915_private *dev_priv) -{ - /* We only register the i915 ips part with intel-ips once everything is - * set up, to avoid intel-ips sneaking in and reading bogus values. */ - spin_lock_irq(&mchdev_lock); - i915_mch_dev = dev_priv; - spin_unlock_irq(&mchdev_lock); + dev_priv->rps.cur_freq = (val >> 8) & 0xff; + DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq), + dev_priv->rps.cur_freq); - ips_ping_for_i915_load(); -} + DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), + dev_priv->rps.efficient_freq); -void intel_gpu_ips_teardown(void) -{ - spin_lock_irq(&mchdev_lock); - i915_mch_dev = NULL; - spin_unlock_irq(&mchdev_lock); + valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq); + + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } -static void intel_init_emon(struct drm_device *dev) +static void valleyview_enable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - u32 lcfuse; - u8 pxw[16]; + struct intel_engine_cs *ring; + u32 gtfifodbg, val, rc6_mode = 0; int i; - /* Disable to program */ - I915_WRITE(ECR, 0); - POSTING_READ(ECR); - - /* Program energy weights for various events */ - I915_WRITE(SDEW, 0x15040d00); - I915_WRITE(CSIEW0, 0x007f0000); - I915_WRITE(CSIEW1, 0x1e220004); - I915_WRITE(CSIEW2, 0x04000004); + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - for (i = 0; i < 5; i++) - I915_WRITE(PEW + (i * 4), 0); - for (i = 0; i < 3; i++) - I915_WRITE(DEW + (i * 4), 0); + valleyview_check_pctx(dev_priv); - /* Program P-state weights to account for frequency power adjustment */ - for (i = 0; i < 16; i++) { - u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4)); - unsigned long freq = intel_pxfreq(pxvidfreq); - unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> - PXVFREQ_PX_SHIFT; - unsigned long val; - - val = vid * vid; - val *= (freq / 1000); - val *= 255; - val /= (127*127*900); - if (val > 0xff) - DRM_ERROR("bad pxval: %ld\n", val); - pxw[i] = val; - } - /* Render standby states get 0 weight */ - pxw[14] = 0; - pxw[15] = 0; - - for (i = 0; i < 4; i++) { - u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | - (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); - I915_WRITE(PXW + (i * 4), val); - } - - /* Adjust magic regs to magic values (more experimental results) */ - I915_WRITE(OGW0, 0); - I915_WRITE(OGW1, 0); - I915_WRITE(EG0, 0x00007f00); - I915_WRITE(EG1, 0x0000000e); - I915_WRITE(EG2, 0x000e0000); - I915_WRITE(EG3, 0x68000300); - I915_WRITE(EG4, 0x42000000); - I915_WRITE(EG5, 0x00140031); - I915_WRITE(EG6, 0); - I915_WRITE(EG7, 0); - - for (i = 0; i < 8; i++) - I915_WRITE(PXWL + (i * 4), 0); - - /* Enable PMON + select events */ - I915_WRITE(ECR, 0x80000019); - - lcfuse = I915_READ(LCFUSE02); - - dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); -} - -void intel_init_gt_powersave(struct drm_device *dev) -{ - i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6); - - if (IS_CHERRYVIEW(dev)) - cherryview_init_gt_powersave(dev); - else if (IS_VALLEYVIEW(dev)) - valleyview_init_gt_powersave(dev); -} - -void intel_cleanup_gt_powersave(struct drm_device *dev) -{ - if (IS_CHERRYVIEW(dev)) - return; - else if (IS_VALLEYVIEW(dev)) - valleyview_cleanup_gt_powersave(dev); -} - -/** - * intel_suspend_gt_powersave - suspend PM work and helper threads - * @dev: drm device - * - * We don't want to disable RC6 or other features here, we just want - * to make sure any work we've queued has finished and won't bother - * us while we're suspended. - */ -void intel_suspend_gt_powersave(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - /* Interrupts should be disabled already to avoid re-arming. */ - WARN_ON(intel_irqs_enabled(dev_priv)); - - flush_delayed_work(&dev_priv->rps.delayed_resume_work); - - cancel_work_sync(&dev_priv->rps.work); - - /* Force GPU to min freq during suspend */ - gen6_rps_idle(dev_priv); -} - -void intel_disable_gt_powersave(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - /* Interrupts should be disabled already to avoid re-arming. */ - WARN_ON(intel_irqs_enabled(dev_priv)); - - if (IS_IRONLAKE_M(dev)) { - ironlake_disable_drps(dev); - ironlake_disable_rc6(dev); - } else if (INTEL_INFO(dev)->gen >= 6) { - intel_suspend_gt_powersave(dev); - - mutex_lock(&dev_priv->rps.hw_lock); - if (IS_CHERRYVIEW(dev)) - cherryview_disable_rps(dev); - else if (IS_VALLEYVIEW(dev)) - valleyview_disable_rps(dev); - else - gen6_disable_rps(dev); - dev_priv->rps.enabled = false; - mutex_unlock(&dev_priv->rps.hw_lock); - } -} - -static void intel_gen6_powersave_work(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, - rps.delayed_resume_work.work); - struct drm_device *dev = dev_priv->dev; - - mutex_lock(&dev_priv->rps.hw_lock); - - if (IS_CHERRYVIEW(dev)) { - cherryview_enable_rps(dev); - } else if (IS_VALLEYVIEW(dev)) { - valleyview_enable_rps(dev); - } else if (IS_BROADWELL(dev)) { - gen8_enable_rps(dev); - __gen6_update_ring_freq(dev); - } else { - gen6_enable_rps(dev); - __gen6_update_ring_freq(dev); - } - dev_priv->rps.enabled = true; - mutex_unlock(&dev_priv->rps.hw_lock); - - intel_runtime_pm_put(dev_priv); -} - -void intel_enable_gt_powersave(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (IS_IRONLAKE_M(dev)) { - mutex_lock(&dev->struct_mutex); - ironlake_enable_drps(dev); - ironlake_enable_rc6(dev); - intel_init_emon(dev); - mutex_unlock(&dev->struct_mutex); - } else if (INTEL_INFO(dev)->gen >= 6) { - /* - * PCU communication is slow and this doesn't need to be - * done at any specific time, so do this out of our fast path - * to make resume and init faster. - * - * We depend on the HW RC6 power context save/restore - * mechanism when entering D3 through runtime PM suspend. So - * disable RPM until RPS/RC6 is properly setup. We can only - * get here via the driver load/system resume/runtime resume - * paths, so the _noresume version is enough (and in case of - * runtime resume it's necessary). - */ - if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, - round_jiffies_up_relative(HZ))) - intel_runtime_pm_get_noresume(dev_priv); + if ((gtfifodbg = I915_READ(GTFIFODBG))) { + DRM_DEBUG_DRIVER("GT fifo had a previous error %x\n", + gtfifodbg); + I915_WRITE(GTFIFODBG, gtfifodbg); } -} - -void intel_reset_gt_powersave(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - dev_priv->rps.enabled = false; - intel_enable_gt_powersave(dev); -} -static void ibx_init_clock_gating(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - /* - * On Ibex Peak and Cougar Point, we need to disable clock - * gating for the panel power sequencer or it will fail to - * start up when no ports are active. - */ - I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); -} - -static void g4x_disable_trickle_feed(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int pipe; + /* If VLV, Forcewake all wells, else re-direct to regular path */ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); - for_each_pipe(dev_priv, pipe) { - I915_WRITE(DSPCNTR(pipe), - I915_READ(DSPCNTR(pipe)) | - DISPPLANE_TRICKLE_FEED_DISABLE); - intel_flush_primary_plane(dev_priv, pipe); - } -} + I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); + I915_WRITE(GEN6_RP_UP_EI, 66000); + I915_WRITE(GEN6_RP_DOWN_EI, 350000); -static void ilk_init_lp_watermarks(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240); - I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); - I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); - I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_CONT); - /* - * Don't touch WM1S_LP_EN here. - * Doing so could cause underruns. - */ -} + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); -static void ironlake_init_clock_gating(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; + for_each_ring(ring, dev_priv, i) + I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); - /* - * Required for FBC - * WaFbcDisableDpfcClockGating:ilk - */ - dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | - ILK_DPFCUNIT_CLOCK_GATE_DISABLE | - ILK_DPFDUNIT_CLOCK_GATE_ENABLE; + I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); - I915_WRITE(PCH_3DCGDIS0, - MARIUNIT_CLOCK_GATE_DISABLE | - SVSMUNIT_CLOCK_GATE_DISABLE); - I915_WRITE(PCH_3DCGDIS1, - VFMUNIT_CLOCK_GATE_DISABLE); + /* allows RC6 residency counter to work */ + I915_WRITE(VLV_COUNTER_CONTROL, + _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN | + VLV_RENDER_RC0_COUNT_EN | + VLV_MEDIA_RC6_COUNT_EN | + VLV_RENDER_RC6_COUNT_EN)); - /* - * According to the spec the following bits should be set in - * order to enable memory self-refresh - * The bit 22/21 of 0x42004 - * The bit 5 of 0x42020 - * The bit 15 of 0x45000 - */ - I915_WRITE(ILK_DISPLAY_CHICKEN2, - (I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_DPARB_GATE | ILK_VSDPFD_FULL)); - dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; - I915_WRITE(DISP_ARB_CTL, - (I915_READ(DISP_ARB_CTL) | - DISP_FBC_WM_DIS)); + if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) + rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; - ilk_init_lp_watermarks(dev); + intel_print_rc6_info(dev, rc6_mode); - /* - * Based on the document from hardware guys the following bits - * should be set unconditionally in order to enable FBC. - * The bit 22 of 0x42000 - * The bit 22 of 0x42004 - * The bit 7,8,9 of 0x42020. - */ - if (IS_IRONLAKE_M(dev)) { - /* WaFbcAsynchFlipDisableFbcQueue:ilk */ - I915_WRITE(ILK_DISPLAY_CHICKEN1, - I915_READ(ILK_DISPLAY_CHICKEN1) | - ILK_FBCQ_DIS); - I915_WRITE(ILK_DISPLAY_CHICKEN2, - I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_DPARB_GATE); - } + I915_WRITE(GEN6_RC_CONTROL, rc6_mode); - I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); + val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS); - I915_WRITE(ILK_DISPLAY_CHICKEN2, - I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_ELPIN_409_SELECT); - I915_WRITE(_3D_CHICKEN2, - _3D_CHICKEN2_WM_READ_PIPELINED << 16 | - _3D_CHICKEN2_WM_READ_PIPELINED); + /* RPS code assumes GPLL is used */ + WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n"); - /* WaDisableRenderCachePipelinedFlush:ilk */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); + DRM_DEBUG_DRIVER("GPLL enabled? %s\n", val & GPLLENABLE ? "yes" : "no"); + DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val); - /* WaDisable_RenderCache_OperationalFlush:ilk */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); + dev_priv->rps.cur_freq = (val >> 8) & 0xff; + DRM_DEBUG_DRIVER("current GPU freq: %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.cur_freq), + dev_priv->rps.cur_freq); - g4x_disable_trickle_feed(dev); + DRM_DEBUG_DRIVER("setting GPU freq to %d MHz (%u)\n", + vlv_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), + dev_priv->rps.efficient_freq); - ibx_init_clock_gating(dev); + valleyview_set_rps(dev_priv->dev, dev_priv->rps.efficient_freq); + + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } -static void cpt_init_clock_gating(struct drm_device *dev) +void ironlake_teardown_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int pipe; - uint32_t val; - /* - * On Ibex Peak and Cougar Point, we need to disable clock - * gating for the panel power sequencer or it will fail to - * start up when no ports are active. - */ - I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | - PCH_DPLUNIT_CLOCK_GATE_DISABLE | - PCH_CPUNIT_CLOCK_GATE_DISABLE); - I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | - DPLS_EDP_PPS_FIX_DIS); - /* The below fixes the weird display corruption, a few pixels shifted - * downward, on (only) LVDS of some HP laptops with IVY. - */ - for_each_pipe(dev_priv, pipe) { - val = I915_READ(TRANS_CHICKEN2(pipe)); - val |= TRANS_CHICKEN2_TIMING_OVERRIDE; - val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; - if (dev_priv->vbt.fdi_rx_polarity_inverted) - val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; - val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; - val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; - val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; - I915_WRITE(TRANS_CHICKEN2(pipe), val); + if (dev_priv->ips.renderctx) { + i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx); + drm_gem_object_unreference(&dev_priv->ips.renderctx->base); + dev_priv->ips.renderctx = NULL; } - /* WADP0ClockGatingDisable */ - for_each_pipe(dev_priv, pipe) { - I915_WRITE(TRANS_CHICKEN1(pipe), - TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); + + if (dev_priv->ips.pwrctx) { + i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx); + drm_gem_object_unreference(&dev_priv->ips.pwrctx->base); + dev_priv->ips.pwrctx = NULL; } } -static void gen6_check_mch_setup(struct drm_device *dev) +static void ironlake_disable_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t tmp; - tmp = I915_READ(MCH_SSKPD); - if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) - DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", - tmp); + if (I915_READ(PWRCTXA)) { + /* Wake the GPU, prevent RC6, then restore RSTDBYCTL */ + I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) | RCX_SW_EXIT); + wait_for(((I915_READ(RSTDBYCTL) & RSX_STATUS_MASK) == RSX_STATUS_ON), + 50); + + I915_WRITE(PWRCTXA, 0); + POSTING_READ(PWRCTXA); + + I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); + POSTING_READ(RSTDBYCTL); + } } -static void gen6_init_clock_gating(struct drm_device *dev) +static int ironlake_setup_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; - I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); + if (dev_priv->ips.renderctx == NULL) + dev_priv->ips.renderctx = intel_alloc_context_page(dev); + if (!dev_priv->ips.renderctx) + return -ENOMEM; - I915_WRITE(ILK_DISPLAY_CHICKEN2, - I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_ELPIN_409_SELECT); + if (dev_priv->ips.pwrctx == NULL) + dev_priv->ips.pwrctx = intel_alloc_context_page(dev); + if (!dev_priv->ips.pwrctx) { + ironlake_teardown_rc6(dev); + return -ENOMEM; + } - /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ - I915_WRITE(_3D_CHICKEN, - _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); + return 0; +} - /* WaDisable_RenderCache_OperationalFlush:snb */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); +static void ironlake_enable_rc6(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[RCS]; + bool was_interruptible; + int ret; - /* - * BSpec recoomends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + /* rc6 disabled by default due to repeated reports of hanging during + * boot and resume. */ - I915_WRITE(GEN6_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); - - ilk_init_lp_watermarks(dev); - - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); - - I915_WRITE(GEN6_UCGCTL1, - I915_READ(GEN6_UCGCTL1) | - GEN6_BLBUNIT_CLOCK_GATE_DISABLE | - GEN6_CSUNIT_CLOCK_GATE_DISABLE); + if (!intel_enable_rc6(dev)) + return; - /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock - * gating disable must be set. Failure to set it results in - * flickering pixels due to Z write ordering failures after - * some amount of runtime in the Mesa "fire" demo, and Unigine - * Sanctuary and Tropics, and apparently anything else with - * alpha test or pixel discard. - * - * According to the spec, bit 11 (RCCUNIT) must also be set, - * but we didn't debug actual testcases to find it out. - * - * WaDisableRCCUnitClockGating:snb - * WaDisableRCPBUnitClockGating:snb - */ - I915_WRITE(GEN6_UCGCTL2, - GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | - GEN6_RCCUNIT_CLOCK_GATE_DISABLE); + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - /* WaStripsFansDisableFastClipPerformanceFix:snb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); + ret = ironlake_setup_rc6(dev); + if (ret) + return; - /* - * Bspec says: - * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and - * 3DSTATE_SF number of SF output attributes is more than 16." - */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); + was_interruptible = dev_priv->mm.interruptible; + dev_priv->mm.interruptible = false; /* - * According to the spec the following bits should be - * set in order to enable memory self-refresh and fbc: - * The bit21 and bit22 of 0x42000 - * The bit21 and bit22 of 0x42004 - * The bit5 and bit7 of 0x42020 - * The bit14 of 0x70180 - * The bit14 of 0x71180 - * - * WaFbcAsynchFlipDisableFbcQueue:snb + * GPU can automatically power down the render unit if given a page + * to save state. */ - I915_WRITE(ILK_DISPLAY_CHICKEN1, - I915_READ(ILK_DISPLAY_CHICKEN1) | - ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); - I915_WRITE(ILK_DISPLAY_CHICKEN2, - I915_READ(ILK_DISPLAY_CHICKEN2) | - ILK_DPARB_GATE | ILK_VSDPFD_FULL); - I915_WRITE(ILK_DSPCLK_GATE_D, - I915_READ(ILK_DSPCLK_GATE_D) | - ILK_DPARBUNIT_CLOCK_GATE_ENABLE | - ILK_DPFDUNIT_CLOCK_GATE_ENABLE); - - g4x_disable_trickle_feed(dev); - - cpt_init_clock_gating(dev); - - gen6_check_mch_setup(dev); -} + ret = intel_ring_begin(ring, 6); + if (ret) { + ironlake_teardown_rc6(dev); + dev_priv->mm.interruptible = was_interruptible; + return; + } -static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) -{ - uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); + intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); + intel_ring_emit(ring, MI_SET_CONTEXT); + intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) | + MI_MM_SPACE_GTT | + MI_SAVE_EXT_STATE_EN | + MI_RESTORE_EXT_STATE_EN | + MI_RESTORE_INHIBIT); + intel_ring_emit(ring, MI_SUSPEND_FLUSH); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_FLUSH); + intel_ring_advance(ring); /* - * WaVSThreadDispatchOverride:ivb,vlv - * - * This actually overrides the dispatch - * mode for all thread types. + * Wait for the command parser to advance past MI_SET_CONTEXT. The HW + * does an implicit flush, combined with MI_FLUSH above, it should be + * safe to assume that renderctx is valid */ - reg &= ~GEN7_FF_SCHED_MASK; - reg |= GEN7_FF_TS_SCHED_HW; - reg |= GEN7_FF_VS_SCHED_HW; - reg |= GEN7_FF_DS_SCHED_HW; - - I915_WRITE(GEN7_FF_THREAD_MODE, reg); -} - -static void lpt_init_clock_gating(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; + ret = intel_ring_idle(ring); + dev_priv->mm.interruptible = was_interruptible; + if (ret) { + DRM_ERROR("failed to enable ironlake power savings\n"); + ironlake_teardown_rc6(dev); + return; + } - /* - * TODO: this bit should only be enabled when really needed, then - * disabled when not needed anymore in order to save power. - */ - if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) - I915_WRITE(SOUTH_DSPCLK_GATE_D, - I915_READ(SOUTH_DSPCLK_GATE_D) | - PCH_LP_PARTITION_LEVEL_DISABLE); + I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN); + I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); - /* WADPOClockGatingDisable:hsw */ - I915_WRITE(_TRANSA_CHICKEN1, - I915_READ(_TRANSA_CHICKEN1) | - TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); + intel_print_rc6_info(dev, GEN6_RC_CTL_RC6_ENABLE); } -static void lpt_suspend_hw(struct drm_device *dev) +static unsigned long intel_pxfreq(u32 vidfreq) { - struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long freq; + int div = (vidfreq & 0x3f0000) >> 16; + int post = (vidfreq & 0x3000) >> 12; + int pre = (vidfreq & 0x7); - if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) { - uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); + if (!pre) + return 0; - val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; - I915_WRITE(SOUTH_DSPCLK_GATE_D, val); - } -} + freq = ((div * 133333) / ((1<dev_private; - enum pipe pipe; + return freq; +} - I915_WRITE(WM3_LP_ILK, 0); - I915_WRITE(WM2_LP_ILK, 0); - I915_WRITE(WM1_LP_ILK, 0); +static const struct cparams { + u16 i; + u16 t; + u16 m; + u16 c; +} cparams[] = { + { 1, 1333, 301, 28664 }, + { 1, 1066, 294, 24460 }, + { 1, 800, 294, 25192 }, + { 0, 1333, 276, 27605 }, + { 0, 1066, 276, 27605 }, + { 0, 800, 231, 23784 }, +}; - /* FIXME(BDW): Check all the w/a, some might only apply to - * pre-production hw. */ +static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) +{ + u64 total_count, diff, ret; + u32 count1, count2, count3, m = 0, c = 0; + unsigned long now = jiffies_to_msecs(jiffies), diff1; + int i; + assert_spin_locked(&mchdev_lock); - I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); + diff1 = now - dev_priv->ips.last_time1; - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2))); + /* Prevent division-by-zero if we are asking too fast. + * Also, we don't get interesting results if we are polling + * faster than once in 10ms, so just return the saved value + * in such cases. + */ + if (diff1 <= 10) + return dev_priv->ips.chipset_power; + count1 = I915_READ(DMIEC); + count2 = I915_READ(DDREC); + count3 = I915_READ(CSIEC); - /* WaSwitchSolVfFArbitrationPriority:bdw */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); + total_count = count1 + count2 + count3; - /* WaPsrDPAMaskVBlankInSRD:bdw */ - I915_WRITE(CHICKEN_PAR1_1, - I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); + /* FIXME: handle per-counter overflow */ + if (total_count < dev_priv->ips.last_count1) { + diff = ~0UL - dev_priv->ips.last_count1; + diff += total_count; + } else { + diff = total_count - dev_priv->ips.last_count1; + } - /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ - for_each_pipe(dev_priv, pipe) { - I915_WRITE(CHICKEN_PIPESL_1(pipe), - I915_READ(CHICKEN_PIPESL_1(pipe)) | - BDW_DPRS_MASK_VBLANK_SRD); + for (i = 0; i < ARRAY_SIZE(cparams); i++) { + if (cparams[i].i == dev_priv->ips.c_m && + cparams[i].t == dev_priv->ips.r_t) { + m = cparams[i].m; + c = cparams[i].c; + break; + } } - /* WaVSRefCountFullforceMissDisable:bdw */ - /* WaDSRefCountFullforceMissDisable:bdw */ - I915_WRITE(GEN7_FF_THREAD_MODE, - I915_READ(GEN7_FF_THREAD_MODE) & - ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); + diff = div_u64(diff, diff1); + ret = ((m * diff) + c); + ret = div_u64(ret, 10); - I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, - _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); + dev_priv->ips.last_count1 = total_count; + dev_priv->ips.last_time1 = now; - /* WaDisableSDEUnitClockGating:bdw */ - I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | - GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + dev_priv->ips.chipset_power = ret; - lpt_init_clock_gating(dev); + return ret; } -static void haswell_init_clock_gating(struct drm_device *dev) +unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_device *dev = dev_priv->dev; + unsigned long val; - ilk_init_lp_watermarks(dev); + if (INTEL_INFO(dev)->gen != 5) + return 0; - /* L3 caching of data atomics doesn't work -- disable it. */ - I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); - I915_WRITE(HSW_ROW_CHICKEN3, - _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); + spin_lock_irq(&mchdev_lock); - /* This is required by WaCatErrorRejectionIssue:hsw */ - I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); + val = __i915_chipset_val(dev_priv); - /* WaVSRefCountFullforceMissDisable:hsw */ - I915_WRITE(GEN7_FF_THREAD_MODE, - I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); + spin_unlock_irq(&mchdev_lock); - /* WaDisable_RenderCache_OperationalFlush:hsw */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); + return val; +} - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); +unsigned long i915_mch_val(struct drm_i915_private *dev_priv) +{ + unsigned long m, x, b; + u32 tsfs; - /* WaDisable4x2SubspanOptimization:hsw */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + tsfs = I915_READ(TSFS); - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT); + x = I915_READ8(TR1); - /* WaSwitchSolVfFArbitrationPriority:hsw */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); + b = tsfs & TSFS_INTR_MASK; - /* WaRsPkgCStateDisplayPMReq:hsw */ - I915_WRITE(CHICKEN_PAR1_1, - I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); + return ((m * x) / 127) - b; +} - lpt_init_clock_gating(dev); +static int _pxvid_to_vd(u8 pxvid) +{ + if (pxvid == 0) + return 0; + + if (pxvid >= 8 && pxvid < 31) + pxvid = 31; + + return (pxvid + 2) * 125; } -static void ivybridge_init_clock_gating(struct drm_device *dev) +static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) { - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t snpcr; + struct drm_device *dev = dev_priv->dev; + const int vd = _pxvid_to_vd(pxvid); + const int vm = vd - 1125; - ilk_init_lp_watermarks(dev); + if (INTEL_INFO(dev)->is_mobile) + return vm > 0 ? vm : 0; - I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); + return vd; +} - /* WaDisableEarlyCull:ivb */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); +static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) +{ + u64 now, diff, diffms; + u32 count; - /* WaDisableBackToBackFlipFix:ivb */ - I915_WRITE(IVB_CHICKEN3, - CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | - CHICKEN3_DGMG_DONE_FIX_DISABLE); + assert_spin_locked(&mchdev_lock); - /* WaDisablePSDDualDispatchEnable:ivb */ - if (IS_IVB_GT1(dev)) - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); + now = ktime_get_raw_ns(); + diffms = now - dev_priv->ips.last_time2; + do_div(diffms, NSEC_PER_MSEC); - /* WaDisable_RenderCache_OperationalFlush:ivb */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); + /* Don't divide by 0 */ + if (!diffms) + return; - /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ - I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, - GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); + count = I915_READ(GFXEC); - /* WaApplyL3ControlAndL3ChickenMode:ivb */ - I915_WRITE(GEN7_L3CNTLREG1, - GEN7_WA_FOR_GEN7_L3_CONTROL); - I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, - GEN7_WA_L3_CHICKEN_MODE); - if (IS_IVB_GT1(dev)) - I915_WRITE(GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); - else { - /* must write both registers */ - I915_WRITE(GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); - I915_WRITE(GEN7_ROW_CHICKEN2_GT2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + if (count < dev_priv->ips.last_count2) { + diff = ~0UL - dev_priv->ips.last_count2; + diff += count; + } else { + diff = count - dev_priv->ips.last_count2; } - /* WaForceL3Serialization:ivb */ - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); + dev_priv->ips.last_count2 = count; + dev_priv->ips.last_time2 = now; - /* - * According to the spec, bit 13 (RCZUNIT) must be set on IVB. - * This implements the WaDisableRCZUnitClockGating:ivb workaround. - */ - I915_WRITE(GEN6_UCGCTL2, - GEN6_RCZUNIT_CLOCK_GATE_DISABLE); + /* More magic constants... */ + diff = diff * 1181; + diff = div_u64(diff, diffms * 10); + dev_priv->ips.gfx_power = diff; +} - /* This is required by WaCatErrorRejectionIssue:ivb */ - I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); +void i915_update_gfx_val(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + + if (INTEL_INFO(dev)->gen != 5) + return; + + spin_lock_irq(&mchdev_lock); + + __i915_update_gfx_val(dev_priv); + + spin_unlock_irq(&mchdev_lock); +} + +static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) +{ + unsigned long t, corr, state1, corr2, state2; + u32 pxvid, ext_v; + + assert_spin_locked(&mchdev_lock); - g4x_disable_trickle_feed(dev); + pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_freq * 4)); + pxvid = (pxvid >> 24) & 0x7f; + ext_v = pvid_to_extvid(dev_priv, pxvid); - gen7_setup_fixed_func_scheduler(dev_priv); + state1 = ext_v; - if (0) { /* causes HiZ corruption on ivb:gt1 */ - /* enable HiZ Raw Stall Optimization */ - I915_WRITE(CACHE_MODE_0_GEN7, - _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - } + t = i915_mch_val(dev_priv); - /* WaDisable4x2SubspanOptimization:ivb */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + /* Revel in the empirically derived constants */ - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - I915_WRITE(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); + /* Correction factor in 1/100000 units */ + if (t > 80) + corr = ((t * 2349) + 135940); + else if (t >= 50) + corr = ((t * 964) + 29317); + else /* < 50 */ + corr = ((t * 301) + 1004); - snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); - snpcr &= ~GEN6_MBC_SNPCR_MASK; - snpcr |= GEN6_MBC_SNPCR_MED; - I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); + corr = corr * ((150142 * state1) / 10000 - 78642); + corr /= 100000; + corr2 = (corr * dev_priv->ips.corr); - if (!HAS_PCH_NOP(dev)) - cpt_init_clock_gating(dev); + state2 = (corr2 * state1) / 10000; + state2 /= 100; /* convert to mW */ - gen6_check_mch_setup(dev); + __i915_update_gfx_val(dev_priv); + + return dev_priv->ips.gfx_power + state2; } -static void valleyview_init_clock_gating(struct drm_device *dev) +unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_device *dev = dev_priv->dev; + unsigned long val; - I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); + if (INTEL_INFO(dev)->gen != 5) + return 0; - /* WaDisableEarlyCull:vlv */ - I915_WRITE(_3D_CHICKEN3, - _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); + spin_lock_irq(&mchdev_lock); - /* WaDisableBackToBackFlipFix:vlv */ - I915_WRITE(IVB_CHICKEN3, - CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | - CHICKEN3_DGMG_DONE_FIX_DISABLE); + val = __i915_gfx_val(dev_priv); - /* WaPsdDispatchEnable:vlv */ - /* WaDisablePSDDualDispatchEnable:vlv */ - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); + spin_unlock_irq(&mchdev_lock); - /* WaDisable_RenderCache_OperationalFlush:vlv */ - I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); + return val; +} - /* WaForceL3Serialization:vlv */ - I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & - ~L3SQ_URB_READ_CAM_MATCH_DISABLE); +/** + * i915_read_mch_val - return value for IPS use + * + * Calculate and return a value for the IPS driver to use when deciding whether + * we have thermal and power headroom to increase CPU or GPU power budget. + */ +unsigned long i915_read_mch_val(void) +{ + struct drm_i915_private *dev_priv; + unsigned long chipset_val, graphics_val, ret = 0; - /* WaDisableDopClockGating:vlv */ - I915_WRITE(GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + spin_lock_irq(&mchdev_lock); + if (!i915_mch_dev) + goto out_unlock; + dev_priv = i915_mch_dev; - /* This is required by WaCatErrorRejectionIssue:vlv */ - I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, - I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | - GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); + chipset_val = __i915_chipset_val(dev_priv); + graphics_val = __i915_gfx_val(dev_priv); - gen7_setup_fixed_func_scheduler(dev_priv); + ret = chipset_val + graphics_val; - /* - * According to the spec, bit 13 (RCZUNIT) must be set on IVB. - * This implements the WaDisableRCZUnitClockGating:vlv workaround. - */ - I915_WRITE(GEN6_UCGCTL2, - GEN6_RCZUNIT_CLOCK_GATE_DISABLE); +out_unlock: + spin_unlock_irq(&mchdev_lock); - /* WaDisableL3Bank2xClockGate:vlv - * Disabling L3 clock gating- MMIO 940c[25] = 1 - * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ - I915_WRITE(GEN7_UCGCTL4, - I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); + return ret; +} +EXPORT_SYMBOL_GPL(i915_read_mch_val); - I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); +/** + * i915_gpu_raise - raise GPU frequency limit + * + * Raise the limit; IPS indicates we have thermal headroom. + */ +bool i915_gpu_raise(void) +{ + struct drm_i915_private *dev_priv; + bool ret = true; - /* - * BSpec says this must be set, even though - * WaDisable4x2SubspanOptimization isn't listed for VLV. - */ - I915_WRITE(CACHE_MODE_1, - _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + spin_lock_irq(&mchdev_lock); + if (!i915_mch_dev) { + ret = false; + goto out_unlock; + } + dev_priv = i915_mch_dev; - /* - * WaIncreaseL3CreditsForVLVB0:vlv - * This is the hardware default actually. - */ - I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); + if (dev_priv->ips.max_delay > dev_priv->ips.fmax) + dev_priv->ips.max_delay--; - /* - * WaDisableVLVClockGating_VBIIssue:vlv - * Disable clock gating on th GCFG unit to prevent a delay - * in the reporting of vblank events. - */ - I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); +out_unlock: + spin_unlock_irq(&mchdev_lock); + + return ret; } +EXPORT_SYMBOL_GPL(i915_gpu_raise); -static void cherryview_init_clock_gating(struct drm_device *dev) +/** + * i915_gpu_lower - lower GPU frequency limit + * + * IPS indicates we're close to a thermal limit, so throttle back the GPU + * frequency maximum. + */ +bool i915_gpu_lower(void) { - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv; + bool ret = true; - I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); + spin_lock_irq(&mchdev_lock); + if (!i915_mch_dev) { + ret = false; + goto out_unlock; + } + dev_priv = i915_mch_dev; - I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); + if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) + dev_priv->ips.max_delay++; - /* WaVSRefCountFullforceMissDisable:chv */ - /* WaDSRefCountFullforceMissDisable:chv */ - I915_WRITE(GEN7_FF_THREAD_MODE, - I915_READ(GEN7_FF_THREAD_MODE) & - ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); +out_unlock: + spin_unlock_irq(&mchdev_lock); - /* WaDisableSemaphoreAndSyncFlipWait:chv */ - I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, - _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); + return ret; +} +EXPORT_SYMBOL_GPL(i915_gpu_lower); - /* WaDisableCSUnitClockGating:chv */ - I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | - GEN6_CSUNIT_CLOCK_GATE_DISABLE); +/** + * i915_gpu_busy - indicate GPU business to IPS + * + * Tell the IPS driver whether or not the GPU is busy. + */ +bool i915_gpu_busy(void) +{ + struct drm_i915_private *dev_priv; + struct intel_engine_cs *ring; + bool ret = false; + int i; - /* WaDisableSDEUnitClockGating:chv */ - I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | - GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + spin_lock_irq(&mchdev_lock); + if (!i915_mch_dev) + goto out_unlock; + dev_priv = i915_mch_dev; - /* WaDisableGunitClockGating:chv (pre-production hw) */ - I915_WRITE(VLV_GUNIT_CLOCK_GATE, I915_READ(VLV_GUNIT_CLOCK_GATE) | - GINT_DIS); + for_each_ring(ring, dev_priv, i) + ret |= !list_empty(&ring->request_list); - /* WaDisableFfDopClockGating:chv (pre-production hw) */ - I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, - _MASKED_BIT_ENABLE(GEN8_FF_DOP_CLOCK_GATE_DISABLE)); +out_unlock: + spin_unlock_irq(&mchdev_lock); - /* WaDisableDopClockGating:chv (pre-production hw) */ - I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | - GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); + return ret; } +EXPORT_SYMBOL_GPL(i915_gpu_busy); -static void g4x_init_clock_gating(struct drm_device *dev) +/** + * i915_gpu_turbo_disable - disable graphics turbo + * + * Disable graphics turbo by resetting the max frequency and setting the + * current frequency to the default. + */ +bool i915_gpu_turbo_disable(void) { - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t dspclk_gate; + struct drm_i915_private *dev_priv; + bool ret = true; - I915_WRITE(RENCLK_GATE_D1, 0); - I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | - GS_UNIT_CLOCK_GATE_DISABLE | - CL_UNIT_CLOCK_GATE_DISABLE); - I915_WRITE(RAMCLK_GATE_D, 0); - dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE | - OVRUNIT_CLOCK_GATE_DISABLE | - OVCUNIT_CLOCK_GATE_DISABLE; - if (IS_GM45(dev)) - dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; - I915_WRITE(DSPCLK_GATE_D, dspclk_gate); + spin_lock_irq(&mchdev_lock); + if (!i915_mch_dev) { + ret = false; + goto out_unlock; + } + dev_priv = i915_mch_dev; - /* WaDisableRenderCachePipelinedFlush */ - I915_WRITE(CACHE_MODE_0, - _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); + dev_priv->ips.max_delay = dev_priv->ips.fstart; + + if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart)) + ret = false; - /* WaDisable_RenderCache_OperationalFlush:g4x */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); +out_unlock: + spin_unlock_irq(&mchdev_lock); - g4x_disable_trickle_feed(dev); + return ret; } +EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); -static void crestline_init_clock_gating(struct drm_device *dev) +/** + * Tells the intel_ips driver that the i915 driver is now loaded, if + * IPS got loaded first. + * + * This awkward dance is so that neither module has to depend on the + * other in order for IPS to do the appropriate communication of + * GPU turbo limits to i915. + */ +static void +ips_ping_for_i915_load(void) { - struct drm_i915_private *dev_priv = dev->dev_private; - - I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); - I915_WRITE(RENCLK_GATE_D2, 0); - I915_WRITE(DSPCLK_GATE_D, 0); - I915_WRITE(RAMCLK_GATE_D, 0); - I915_WRITE16(DEUC, 0); - I915_WRITE(MI_ARB_STATE, - _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); + void (*link)(void); - /* WaDisable_RenderCache_OperationalFlush:gen4 */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); + link = symbol_get(ips_link_to_i915_driver); + if (link) { + link(); + symbol_put(ips_link_to_i915_driver); + } } -static void broadwater_init_clock_gating(struct drm_device *dev) +void intel_gpu_ips_init(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; + /* We only register the i915 ips part with intel-ips once everything is + * set up, to avoid intel-ips sneaking in and reading bogus values. */ + spin_lock_irq(&mchdev_lock); + i915_mch_dev = dev_priv; + spin_unlock_irq(&mchdev_lock); - I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | - I965_RCC_CLOCK_GATE_DISABLE | - I965_RCPB_CLOCK_GATE_DISABLE | - I965_ISC_CLOCK_GATE_DISABLE | - I965_FBC_CLOCK_GATE_DISABLE); - I915_WRITE(RENCLK_GATE_D2, 0); - I915_WRITE(MI_ARB_STATE, - _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); + ips_ping_for_i915_load(); +} - /* WaDisable_RenderCache_OperationalFlush:gen4 */ - I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); +void intel_gpu_ips_teardown(void) +{ + spin_lock_irq(&mchdev_lock); + i915_mch_dev = NULL; + spin_unlock_irq(&mchdev_lock); } -static void gen3_init_clock_gating(struct drm_device *dev) +static void intel_init_emon(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - u32 dstate = I915_READ(D_STATE); + u32 lcfuse; + u8 pxw[16]; + int i; - dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | - DSTATE_DOT_CLOCK_GATING; - I915_WRITE(D_STATE, dstate); + /* Disable to program */ + I915_WRITE(ECR, 0); + POSTING_READ(ECR); - if (IS_PINEVIEW(dev)) - I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); + /* Program energy weights for various events */ + I915_WRITE(SDEW, 0x15040d00); + I915_WRITE(CSIEW0, 0x007f0000); + I915_WRITE(CSIEW1, 0x1e220004); + I915_WRITE(CSIEW2, 0x04000004); - /* IIR "flip pending" means done if this bit is set */ - I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE)); + for (i = 0; i < 5; i++) + I915_WRITE(PEW + (i * 4), 0); + for (i = 0; i < 3; i++) + I915_WRITE(DEW + (i * 4), 0); - /* interrupts should cause a wake up from C3 */ - I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN)); + /* Program P-state weights to account for frequency power adjustment */ + for (i = 0; i < 16; i++) { + u32 pxvidfreq = I915_READ(PXVFREQ_BASE + (i * 4)); + unsigned long freq = intel_pxfreq(pxvidfreq); + unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >> + PXVFREQ_PX_SHIFT; + unsigned long val; - /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ - I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); + val = vid * vid; + val *= (freq / 1000); + val *= 255; + val /= (127*127*900); + if (val > 0xff) + DRM_ERROR("bad pxval: %ld\n", val); + pxw[i] = val; + } + /* Render standby states get 0 weight */ + pxw[14] = 0; + pxw[15] = 0; - I915_WRITE(MI_ARB_STATE, - _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); + for (i = 0; i < 4; i++) { + u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) | + (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]); + I915_WRITE(PXW + (i * 4), val); + } + + /* Adjust magic regs to magic values (more experimental results) */ + I915_WRITE(OGW0, 0); + I915_WRITE(OGW1, 0); + I915_WRITE(EG0, 0x00007f00); + I915_WRITE(EG1, 0x0000000e); + I915_WRITE(EG2, 0x000e0000); + I915_WRITE(EG3, 0x68000300); + I915_WRITE(EG4, 0x42000000); + I915_WRITE(EG5, 0x00140031); + I915_WRITE(EG6, 0); + I915_WRITE(EG7, 0); + + for (i = 0; i < 8; i++) + I915_WRITE(PXWL + (i * 4), 0); + + /* Enable PMON + select events */ + I915_WRITE(ECR, 0x80000019); + + lcfuse = I915_READ(LCFUSE02); + + dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); } -static void i85x_init_clock_gating(struct drm_device *dev) +void intel_init_gt_powersave(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; - - I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); + i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6); - /* interrupts should cause a wake up from C3 */ - I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) | - _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE)); + if (IS_CHERRYVIEW(dev)) + cherryview_init_gt_powersave(dev); + else if (IS_VALLEYVIEW(dev)) + valleyview_init_gt_powersave(dev); +} - I915_WRITE(MEM_MODE, - _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); +void intel_cleanup_gt_powersave(struct drm_device *dev) +{ + if (IS_CHERRYVIEW(dev)) + return; + else if (IS_VALLEYVIEW(dev)) + valleyview_cleanup_gt_powersave(dev); } -static void i830_init_clock_gating(struct drm_device *dev) +static void gen6_suspend_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); + flush_delayed_work(&dev_priv->rps.delayed_resume_work); - I915_WRITE(MEM_MODE, - _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) | - _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); + /* + * TODO: disable RPS interrupts on GEN9+ too once RPS support + * is added for it. + */ + if (INTEL_INFO(dev)->gen < 9) + gen6_disable_rps_interrupts(dev); } -void intel_init_clock_gating(struct drm_device *dev) +/** + * intel_suspend_gt_powersave - suspend PM work and helper threads + * @dev: drm device + * + * We don't want to disable RC6 or other features here, we just want + * to make sure any work we've queued has finished and won't bother + * us while we're suspended. + */ +void intel_suspend_gt_powersave(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - dev_priv->display.init_clock_gating(dev); + if (INTEL_INFO(dev)->gen < 6) + return; + + gen6_suspend_rps(dev); + + /* Force GPU to min freq during suspend */ + gen6_rps_idle(dev_priv); } -void intel_suspend_hw(struct drm_device *dev) +void intel_disable_gt_powersave(struct drm_device *dev) { - if (HAS_PCH_LPT(dev)) - lpt_suspend_hw(dev); -} + struct drm_i915_private *dev_priv = dev->dev_private; -#define for_each_power_well(i, power_well, domain_mask, power_domains) \ - for (i = 0; \ - i < (power_domains)->power_well_count && \ - ((power_well) = &(power_domains)->power_wells[i]); \ - i++) \ - if ((power_well)->domains & (domain_mask)) + if (IS_IRONLAKE_M(dev)) { + ironlake_disable_drps(dev); + ironlake_disable_rc6(dev); + } else if (INTEL_INFO(dev)->gen >= 6) { + intel_suspend_gt_powersave(dev); -#define for_each_power_well_rev(i, power_well, domain_mask, power_domains) \ - for (i = (power_domains)->power_well_count - 1; \ - i >= 0 && ((power_well) = &(power_domains)->power_wells[i]);\ - i--) \ - if ((power_well)->domains & (domain_mask)) + mutex_lock(&dev_priv->rps.hw_lock); + if (INTEL_INFO(dev)->gen >= 9) + gen9_disable_rps(dev); + else if (IS_CHERRYVIEW(dev)) + cherryview_disable_rps(dev); + else if (IS_VALLEYVIEW(dev)) + valleyview_disable_rps(dev); + else + gen6_disable_rps(dev); -/** - * We should only use the power well if we explicitly asked the hardware to - * enable it, so check if it's enabled and also check if we've requested it to - * be enabled. - */ -static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - return I915_READ(HSW_PWR_WELL_DRIVER) == - (HSW_PWR_WELL_ENABLE_REQUEST | HSW_PWR_WELL_STATE_ENABLED); + dev_priv->rps.enabled = false; + mutex_unlock(&dev_priv->rps.hw_lock); + } } -bool intel_display_power_enabled_unlocked(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +static void intel_gen6_powersave_work(struct work_struct *work) { - struct i915_power_domains *power_domains; - struct i915_power_well *power_well; - bool is_enabled; - int i; + struct drm_i915_private *dev_priv = + container_of(work, struct drm_i915_private, + rps.delayed_resume_work.work); + struct drm_device *dev = dev_priv->dev; - if (dev_priv->pm.suspended) - return false; + mutex_lock(&dev_priv->rps.hw_lock); + + /* + * TODO: reset/enable RPS interrupts on GEN9+ too, once RPS support is + * added for it. + */ + if (INTEL_INFO(dev)->gen < 9) + gen6_reset_rps_interrupts(dev); + + if (IS_CHERRYVIEW(dev)) { + cherryview_enable_rps(dev); + } else if (IS_VALLEYVIEW(dev)) { + valleyview_enable_rps(dev); + } else if (INTEL_INFO(dev)->gen >= 9) { + gen9_enable_rps(dev); + } else if (IS_BROADWELL(dev)) { + gen8_enable_rps(dev); + __gen6_update_ring_freq(dev); + } else { + gen6_enable_rps(dev); + __gen6_update_ring_freq(dev); + } + dev_priv->rps.enabled = true; - power_domains = &dev_priv->power_domains; + if (INTEL_INFO(dev)->gen < 9) + gen6_enable_rps_interrupts(dev); - is_enabled = true; + mutex_unlock(&dev_priv->rps.hw_lock); - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { - if (power_well->always_on) - continue; + intel_runtime_pm_put(dev_priv); +} - if (!power_well->hw_enabled) { - is_enabled = false; - break; - } +void intel_enable_gt_powersave(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (IS_IRONLAKE_M(dev)) { + mutex_lock(&dev->struct_mutex); + ironlake_enable_drps(dev); + ironlake_enable_rc6(dev); + intel_init_emon(dev); + mutex_unlock(&dev->struct_mutex); + } else if (INTEL_INFO(dev)->gen >= 6) { + /* + * PCU communication is slow and this doesn't need to be + * done at any specific time, so do this out of our fast path + * to make resume and init faster. + * + * We depend on the HW RC6 power context save/restore + * mechanism when entering D3 through runtime PM suspend. So + * disable RPM until RPS/RC6 is properly setup. We can only + * get here via the driver load/system resume/runtime resume + * paths, so the _noresume version is enough (and in case of + * runtime resume it's necessary). + */ + if (schedule_delayed_work(&dev_priv->rps.delayed_resume_work, + round_jiffies_up_relative(HZ))) + intel_runtime_pm_get_noresume(dev_priv); } - - return is_enabled; } -bool intel_display_power_enabled(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +void intel_reset_gt_powersave(struct drm_device *dev) { - struct i915_power_domains *power_domains; - bool ret; - - power_domains = &dev_priv->power_domains; + struct drm_i915_private *dev_priv = dev->dev_private; - mutex_lock(&power_domains->lock); - ret = intel_display_power_enabled_unlocked(dev_priv, domain); - mutex_unlock(&power_domains->lock); + if (INTEL_INFO(dev)->gen < 6) + return; - return ret; + gen6_suspend_rps(dev); + dev_priv->rps.enabled = false; } -/* - * Starting with Haswell, we have a "Power Down Well" that can be turned off - * when not needed anymore. We have 4 registers that can request the power well - * to be enabled, and it will only be disabled if none of the registers is - * requesting it to be enabled. - */ -static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv) +static void ibx_init_clock_gating(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; + struct drm_i915_private *dev_priv = dev->dev_private; /* - * After we re-enable the power well, if we touch VGA register 0x3d5 - * we'll get unclaimed register interrupts. This stops after we write - * anything to the VGA MSR register. The vgacon module uses this - * register all the time, so if we unbind our driver and, as a - * consequence, bind vgacon, we'll get stuck in an infinite loop at - * console_unlock(). So make here we touch the VGA MSR register, making - * sure vgacon can keep working normally without triggering interrupts - * and error messages. + * On Ibex Peak and Cougar Point, we need to disable clock + * gating for the panel power sequencer or it will fail to + * start up when no ports are active. */ - vga_get_uninterruptible(dev->pdev, VGA_RSRC_LEGACY_IO); - outb(inb(VGA_MSR_READ), VGA_MSR_WRITE); - vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); - - if (IS_BROADWELL(dev)) - gen8_irq_power_well_post_enable(dev_priv); + I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); } -static void hsw_set_power_well(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well, bool enable) +static void g4x_disable_trickle_feed(struct drm_device *dev) { - bool is_enabled, enable_requested; - uint32_t tmp; - - tmp = I915_READ(HSW_PWR_WELL_DRIVER); - is_enabled = tmp & HSW_PWR_WELL_STATE_ENABLED; - enable_requested = tmp & HSW_PWR_WELL_ENABLE_REQUEST; - - if (enable) { - if (!enable_requested) - I915_WRITE(HSW_PWR_WELL_DRIVER, - HSW_PWR_WELL_ENABLE_REQUEST); - - if (!is_enabled) { - DRM_DEBUG_KMS("Enabling power well\n"); - if (wait_for((I915_READ(HSW_PWR_WELL_DRIVER) & - HSW_PWR_WELL_STATE_ENABLED), 20)) - DRM_ERROR("Timeout enabling power well\n"); - } + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; - hsw_power_well_post_enable(dev_priv); - } else { - if (enable_requested) { - I915_WRITE(HSW_PWR_WELL_DRIVER, 0); - POSTING_READ(HSW_PWR_WELL_DRIVER); - DRM_DEBUG_KMS("Requesting to disable the power well\n"); - } + for_each_pipe(dev_priv, pipe) { + I915_WRITE(DSPCNTR(pipe), + I915_READ(DSPCNTR(pipe)) | + DISPPLANE_TRICKLE_FEED_DISABLE); + intel_flush_primary_plane(dev_priv, pipe); } } -static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void ilk_init_lp_watermarks(struct drm_device *dev) { - hsw_set_power_well(dev_priv, power_well, power_well->count > 0); + struct drm_i915_private *dev_priv = dev->dev_private; + + I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); + I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); /* - * We're taking over the BIOS, so clear any requests made by it since - * the driver is in charge now. + * Don't touch WM1S_LP_EN here. + * Doing so could cause underruns. */ - if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) - I915_WRITE(HSW_PWR_WELL_BIOS, 0); -} - -static void hsw_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - hsw_set_power_well(dev_priv, power_well, true); -} - -static void hsw_power_well_disable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - hsw_set_power_well(dev_priv, power_well, false); } -static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ -} - -static bool i9xx_always_on_power_well_enabled(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - return true; -} - -static void vlv_set_power_well(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well, bool enable) +static void ironlake_init_clock_gating(struct drm_device *dev) { - enum punit_power_well power_well_id = power_well->data; - u32 mask; - u32 state; - u32 ctrl; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; - mask = PUNIT_PWRGT_MASK(power_well_id); - state = enable ? PUNIT_PWRGT_PWR_ON(power_well_id) : - PUNIT_PWRGT_PWR_GATE(power_well_id); + /* + * Required for FBC + * WaFbcDisableDpfcClockGating:ilk + */ + dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | + ILK_DPFCUNIT_CLOCK_GATE_DISABLE | + ILK_DPFDUNIT_CLOCK_GATE_ENABLE; - mutex_lock(&dev_priv->rps.hw_lock); + I915_WRITE(PCH_3DCGDIS0, + MARIUNIT_CLOCK_GATE_DISABLE | + SVSMUNIT_CLOCK_GATE_DISABLE); + I915_WRITE(PCH_3DCGDIS1, + VFMUNIT_CLOCK_GATE_DISABLE); -#define COND \ - ((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state) + /* + * According to the spec the following bits should be set in + * order to enable memory self-refresh + * The bit 22/21 of 0x42004 + * The bit 5 of 0x42020 + * The bit 15 of 0x45000 + */ + I915_WRITE(ILK_DISPLAY_CHICKEN2, + (I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_DPARB_GATE | ILK_VSDPFD_FULL)); + dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; + I915_WRITE(DISP_ARB_CTL, + (I915_READ(DISP_ARB_CTL) | + DISP_FBC_WM_DIS)); - if (COND) - goto out; + ilk_init_lp_watermarks(dev); - ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL); - ctrl &= ~mask; - ctrl |= state; - vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, ctrl); + /* + * Based on the document from hardware guys the following bits + * should be set unconditionally in order to enable FBC. + * The bit 22 of 0x42000 + * The bit 22 of 0x42004 + * The bit 7,8,9 of 0x42020. + */ + if (IS_IRONLAKE_M(dev)) { + /* WaFbcAsynchFlipDisableFbcQueue:ilk */ + I915_WRITE(ILK_DISPLAY_CHICKEN1, + I915_READ(ILK_DISPLAY_CHICKEN1) | + ILK_FBCQ_DIS); + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_DPARB_GATE); + } - if (wait_for(COND, 100)) - DRM_ERROR("timout setting power well state %08x (%08x)\n", - state, - vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL)); + I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); -#undef COND + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_ELPIN_409_SELECT); + I915_WRITE(_3D_CHICKEN2, + _3D_CHICKEN2_WM_READ_PIPELINED << 16 | + _3D_CHICKEN2_WM_READ_PIPELINED); -out: - mutex_unlock(&dev_priv->rps.hw_lock); -} + /* WaDisableRenderCachePipelinedFlush:ilk */ + I915_WRITE(CACHE_MODE_0, + _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); -static void vlv_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - vlv_set_power_well(dev_priv, power_well, power_well->count > 0); -} + /* WaDisable_RenderCache_OperationalFlush:ilk */ + I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); -static void vlv_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - vlv_set_power_well(dev_priv, power_well, true); -} + g4x_disable_trickle_feed(dev); -static void vlv_power_well_disable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - vlv_set_power_well(dev_priv, power_well, false); + ibx_init_clock_gating(dev); } -static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void cpt_init_clock_gating(struct drm_device *dev) { - int power_well_id = power_well->data; - bool enabled = false; - u32 mask; - u32 state; - u32 ctrl; - - mask = PUNIT_PWRGT_MASK(power_well_id); - ctrl = PUNIT_PWRGT_PWR_ON(power_well_id); - - mutex_lock(&dev_priv->rps.hw_lock); + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; + uint32_t val; - state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask; /* - * We only ever set the power-on and power-gate states, anything - * else is unexpected. + * On Ibex Peak and Cougar Point, we need to disable clock + * gating for the panel power sequencer or it will fail to + * start up when no ports are active. */ - WARN_ON(state != PUNIT_PWRGT_PWR_ON(power_well_id) && - state != PUNIT_PWRGT_PWR_GATE(power_well_id)); - if (state == ctrl) - enabled = true; - - /* - * A transient state at this point would mean some unexpected party - * is poking at the power controls too. + I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE | + PCH_DPLUNIT_CLOCK_GATE_DISABLE | + PCH_CPUNIT_CLOCK_GATE_DISABLE); + I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | + DPLS_EDP_PPS_FIX_DIS); + /* The below fixes the weird display corruption, a few pixels shifted + * downward, on (only) LVDS of some HP laptops with IVY. */ - ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask; - WARN_ON(ctrl != state); + for_each_pipe(dev_priv, pipe) { + val = I915_READ(TRANS_CHICKEN2(pipe)); + val |= TRANS_CHICKEN2_TIMING_OVERRIDE; + val &= ~TRANS_CHICKEN2_FDI_POLARITY_REVERSED; + if (dev_priv->vbt.fdi_rx_polarity_inverted) + val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; + val &= ~TRANS_CHICKEN2_FRAME_START_DELAY_MASK; + val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_COUNTER; + val &= ~TRANS_CHICKEN2_DISABLE_DEEP_COLOR_MODESWITCH; + I915_WRITE(TRANS_CHICKEN2(pipe), val); + } + /* WADP0ClockGatingDisable */ + for_each_pipe(dev_priv, pipe) { + I915_WRITE(TRANS_CHICKEN1(pipe), + TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); + } +} - mutex_unlock(&dev_priv->rps.hw_lock); +static void gen6_check_mch_setup(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t tmp; - return enabled; + tmp = I915_READ(MCH_SSKPD); + if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL) + DRM_DEBUG_KMS("Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n", + tmp); } -static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void gen6_init_clock_gating(struct drm_device *dev) { - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D); + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; - vlv_set_power_well(dev_priv, power_well, true); + I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); - spin_lock_irq(&dev_priv->irq_lock); - valleyview_enable_display_irqs(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_ELPIN_409_SELECT); - /* - * During driver initialization/resume we can avoid restoring the - * part of the HW/SW state that will be inited anyway explicitly. - */ - if (dev_priv->power_domains.initializing) - return; + /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ + I915_WRITE(_3D_CHICKEN, + _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); - intel_hpd_init(dev_priv->dev); + /* WaDisable_RenderCache_OperationalFlush:snb */ + I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - i915_redisable_vga_power_on(dev_priv->dev); -} + /* + * BSpec recoomends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + I915_WRITE(GEN6_GT_MODE, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); -static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D); + ilk_init_lp_watermarks(dev); - spin_lock_irq(&dev_priv->irq_lock); - valleyview_disable_display_irqs(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + I915_WRITE(CACHE_MODE_0, + _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); - vlv_set_power_well(dev_priv, power_well, false); + I915_WRITE(GEN6_UCGCTL1, + I915_READ(GEN6_UCGCTL1) | + GEN6_BLBUNIT_CLOCK_GATE_DISABLE | + GEN6_CSUNIT_CLOCK_GATE_DISABLE); - vlv_power_sequencer_reset(dev_priv); -} + /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock + * gating disable must be set. Failure to set it results in + * flickering pixels due to Z write ordering failures after + * some amount of runtime in the Mesa "fire" demo, and Unigine + * Sanctuary and Tropics, and apparently anything else with + * alpha test or pixel discard. + * + * According to the spec, bit 11 (RCCUNIT) must also be set, + * but we didn't debug actual testcases to find it out. + * + * WaDisableRCCUnitClockGating:snb + * WaDisableRCPBUnitClockGating:snb + */ + I915_WRITE(GEN6_UCGCTL2, + GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | + GEN6_RCCUNIT_CLOCK_GATE_DISABLE); -static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC); + /* WaStripsFansDisableFastClipPerformanceFix:snb */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL)); /* - * Enable the CRI clock source so we can get at the - * display and the reference clock for VGA - * hotplug / manual detection. + * Bspec says: + * "This bit must be set if 3DSTATE_CLIP clip mode is set to normal and + * 3DSTATE_SF number of SF output attributes is more than 16." */ - I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | - DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); - udelay(1); /* >10ns for cmnreset, >0ns for sidereset */ - - vlv_set_power_well(dev_priv, power_well, true); + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH)); /* - * From VLV2A0_DP_eDP_DPIO_driver_vbios_notes_10.docx - - * 6. De-assert cmn_reset/side_reset. Same as VLV X0. - * a. GUnit 0x2110 bit[0] set to 1 (def 0) - * b. The other bits such as sfr settings / modesel may all - * be set to 0. + * According to the spec the following bits should be + * set in order to enable memory self-refresh and fbc: + * The bit21 and bit22 of 0x42000 + * The bit21 and bit22 of 0x42004 + * The bit5 and bit7 of 0x42020 + * The bit14 of 0x70180 + * The bit14 of 0x71180 * - * This should only be done on init and resume from S3 with - * both PLLs disabled, or we risk losing DPIO and PLL - * synchronization. + * WaFbcAsynchFlipDisableFbcQueue:snb */ - I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) | DPIO_CMNRST); -} - -static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - enum pipe pipe; - - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC); + I915_WRITE(ILK_DISPLAY_CHICKEN1, + I915_READ(ILK_DISPLAY_CHICKEN1) | + ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS); + I915_WRITE(ILK_DISPLAY_CHICKEN2, + I915_READ(ILK_DISPLAY_CHICKEN2) | + ILK_DPARB_GATE | ILK_VSDPFD_FULL); + I915_WRITE(ILK_DSPCLK_GATE_D, + I915_READ(ILK_DSPCLK_GATE_D) | + ILK_DPARBUNIT_CLOCK_GATE_ENABLE | + ILK_DPFDUNIT_CLOCK_GATE_ENABLE); - for_each_pipe(dev_priv, pipe) - assert_pll_disabled(dev_priv, pipe); + g4x_disable_trickle_feed(dev); - /* Assert common reset */ - I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) & ~DPIO_CMNRST); + cpt_init_clock_gating(dev); - vlv_set_power_well(dev_priv, power_well, false); + gen6_check_mch_setup(dev); } -static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) { - enum dpio_phy phy; - - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC && - power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D); + uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE); /* - * Enable the CRI clock source so we can get at the - * display and the reference clock for VGA - * hotplug / manual detection. + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. */ - if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { - phy = DPIO_PHY0; - I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | - DPLL_REFA_CLK_ENABLE_VLV); - I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | - DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); - } else { - phy = DPIO_PHY1; - I915_WRITE(DPLL(PIPE_C), I915_READ(DPLL(PIPE_C)) | - DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); - } - udelay(1); /* >10ns for cmnreset, >0ns for sidereset */ - vlv_set_power_well(dev_priv, power_well, true); - - /* Poll for phypwrgood signal */ - if (wait_for(I915_READ(DISPLAY_PHY_STATUS) & PHY_POWERGOOD(phy), 1)) - DRM_ERROR("Display PHY %d is not power up\n", phy); + reg &= ~GEN7_FF_SCHED_MASK; + reg |= GEN7_FF_TS_SCHED_HW; + reg |= GEN7_FF_VS_SCHED_HW; + reg |= GEN7_FF_DS_SCHED_HW; - I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) | - PHY_COM_LANE_RESET_DEASSERT(phy)); + I915_WRITE(GEN7_FF_THREAD_MODE, reg); } -static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void lpt_init_clock_gating(struct drm_device *dev) { - enum dpio_phy phy; - - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC && - power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D); - - if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { - phy = DPIO_PHY0; - assert_pll_disabled(dev_priv, PIPE_A); - assert_pll_disabled(dev_priv, PIPE_B); - } else { - phy = DPIO_PHY1; - assert_pll_disabled(dev_priv, PIPE_C); - } + struct drm_i915_private *dev_priv = dev->dev_private; - I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) & - ~PHY_COM_LANE_RESET_DEASSERT(phy)); + /* + * TODO: this bit should only be enabled when really needed, then + * disabled when not needed anymore in order to save power. + */ + if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) + I915_WRITE(SOUTH_DSPCLK_GATE_D, + I915_READ(SOUTH_DSPCLK_GATE_D) | + PCH_LP_PARTITION_LEVEL_DISABLE); - vlv_set_power_well(dev_priv, power_well, false); + /* WADPOClockGatingDisable:hsw */ + I915_WRITE(_TRANSA_CHICKEN1, + I915_READ(_TRANSA_CHICKEN1) | + TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); } -static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void lpt_suspend_hw(struct drm_device *dev) { - enum pipe pipe = power_well->data; - bool enabled; - u32 state, ctrl; - - mutex_lock(&dev_priv->rps.hw_lock); - - state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe); - /* - * We only ever set the power-on and power-gate states, anything - * else is unexpected. - */ - WARN_ON(state != DP_SSS_PWR_ON(pipe) && state != DP_SSS_PWR_GATE(pipe)); - enabled = state == DP_SSS_PWR_ON(pipe); - - /* - * A transient state at this point would mean some unexpected party - * is poking at the power controls too. - */ - ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe); - WARN_ON(ctrl << 16 != state); + struct drm_i915_private *dev_priv = dev->dev_private; - mutex_unlock(&dev_priv->rps.hw_lock); + if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) { + uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); - return enabled; + val &= ~PCH_LP_PARTITION_LEVEL_DISABLE; + I915_WRITE(SOUTH_DSPCLK_GATE_D, val); + } } -static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well, - bool enable) +static void broadwell_init_clock_gating(struct drm_device *dev) { - enum pipe pipe = power_well->data; - u32 state; - u32 ctrl; + struct drm_i915_private *dev_priv = dev->dev_private; + enum pipe pipe; - state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); + I915_WRITE(WM3_LP_ILK, 0); + I915_WRITE(WM2_LP_ILK, 0); + I915_WRITE(WM1_LP_ILK, 0); - mutex_lock(&dev_priv->rps.hw_lock); + /* WaSwitchSolVfFArbitrationPriority:bdw */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); -#define COND \ - ((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state) + /* WaPsrDPAMaskVBlankInSRD:bdw */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); - if (COND) - goto out; + /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ + for_each_pipe(dev_priv, pipe) { + I915_WRITE(CHICKEN_PIPESL_1(pipe), + I915_READ(CHICKEN_PIPESL_1(pipe)) | + BDW_DPRS_MASK_VBLANK_SRD); + } - ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); - ctrl &= ~DP_SSC_MASK(pipe); - ctrl |= enable ? DP_SSC_PWR_ON(pipe) : DP_SSC_PWR_GATE(pipe); - vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, ctrl); + /* WaVSRefCountFullforceMissDisable:bdw */ + /* WaDSRefCountFullforceMissDisable:bdw */ + I915_WRITE(GEN7_FF_THREAD_MODE, + I915_READ(GEN7_FF_THREAD_MODE) & + ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); - if (wait_for(COND, 100)) - DRM_ERROR("timout setting power well state %08x (%08x)\n", - state, - vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ)); + I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, + _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); -#undef COND + /* WaDisableSDEUnitClockGating:bdw */ + I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | + GEN8_SDEUNIT_CLOCK_GATE_DISABLE); -out: - mutex_unlock(&dev_priv->rps.hw_lock); + lpt_init_clock_gating(dev); } -static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void haswell_init_clock_gating(struct drm_device *dev) { - chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0); -} + struct drm_i915_private *dev_priv = dev->dev_private; -static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->data != PIPE_A && - power_well->data != PIPE_B && - power_well->data != PIPE_C); + ilk_init_lp_watermarks(dev); - chv_set_pipe_power_well(dev_priv, power_well, true); -} + /* L3 caching of data atomics doesn't work -- disable it. */ + I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); + I915_WRITE(HSW_ROW_CHICKEN3, + _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE)); -static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->data != PIPE_A && - power_well->data != PIPE_B && - power_well->data != PIPE_C); + /* This is required by WaCatErrorRejectionIssue:hsw */ + I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - chv_set_pipe_power_well(dev_priv, power_well, false); -} + /* WaVSRefCountFullforceMissDisable:hsw */ + I915_WRITE(GEN7_FF_THREAD_MODE, + I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME); -static void check_power_well_state(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - bool enabled = power_well->ops->is_enabled(dev_priv, power_well); + /* WaDisable_RenderCache_OperationalFlush:hsw */ + I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - if (power_well->always_on || !i915.disable_power_well) { - if (!enabled) - goto mismatch; + /* enable HiZ Raw Stall Optimization */ + I915_WRITE(CACHE_MODE_0_GEN7, + _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); - return; - } + /* WaDisable4x2SubspanOptimization:hsw */ + I915_WRITE(CACHE_MODE_1, + _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + I915_WRITE(GEN7_GT_MODE, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - if (enabled != (power_well->count > 0)) - goto mismatch; + /* WaSwitchSolVfFArbitrationPriority:hsw */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); - return; + /* WaRsPkgCStateDisplayPMReq:hsw */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); -mismatch: - WARN(1, "state mismatch for '%s' (always_on %d hw state %d use-count %d disable_power_well %d\n", - power_well->name, power_well->always_on, enabled, - power_well->count, i915.disable_power_well); + lpt_init_clock_gating(dev); } -void intel_display_power_get(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) +static void ivybridge_init_clock_gating(struct drm_device *dev) { - struct i915_power_domains *power_domains; - struct i915_power_well *power_well; - int i; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t snpcr; - intel_runtime_pm_get(dev_priv); + ilk_init_lp_watermarks(dev); - power_domains = &dev_priv->power_domains; + I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); - mutex_lock(&power_domains->lock); + /* WaDisableEarlyCull:ivb */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); - for_each_power_well(i, power_well, BIT(domain), power_domains) { - if (!power_well->count++) { - DRM_DEBUG_KMS("enabling %s\n", power_well->name); - power_well->ops->enable(dev_priv, power_well); - power_well->hw_enabled = true; - } + /* WaDisableBackToBackFlipFix:ivb */ + I915_WRITE(IVB_CHICKEN3, + CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | + CHICKEN3_DGMG_DONE_FIX_DISABLE); - check_power_well_state(dev_priv, power_well); - } + /* WaDisablePSDDualDispatchEnable:ivb */ + if (IS_IVB_GT1(dev)) + I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, + _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); - power_domains->domain_use_count[domain]++; + /* WaDisable_RenderCache_OperationalFlush:ivb */ + I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - mutex_unlock(&power_domains->lock); -} + /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ + I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, + GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); -void intel_display_power_put(struct drm_i915_private *dev_priv, - enum intel_display_power_domain domain) -{ - struct i915_power_domains *power_domains; - struct i915_power_well *power_well; - int i; + /* WaApplyL3ControlAndL3ChickenMode:ivb */ + I915_WRITE(GEN7_L3CNTLREG1, + GEN7_WA_FOR_GEN7_L3_CONTROL); + I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, + GEN7_WA_L3_CHICKEN_MODE); + if (IS_IVB_GT1(dev)) + I915_WRITE(GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + else { + /* must write both registers */ + I915_WRITE(GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + I915_WRITE(GEN7_ROW_CHICKEN2_GT2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + } - power_domains = &dev_priv->power_domains; + /* WaForceL3Serialization:ivb */ + I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & + ~L3SQ_URB_READ_CAM_MATCH_DISABLE); - mutex_lock(&power_domains->lock); + /* + * According to the spec, bit 13 (RCZUNIT) must be set on IVB. + * This implements the WaDisableRCZUnitClockGating:ivb workaround. + */ + I915_WRITE(GEN6_UCGCTL2, + GEN6_RCZUNIT_CLOCK_GATE_DISABLE); - WARN_ON(!power_domains->domain_use_count[domain]); - power_domains->domain_use_count[domain]--; + /* This is required by WaCatErrorRejectionIssue:ivb */ + I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { - WARN_ON(!power_well->count); + g4x_disable_trickle_feed(dev); - if (!--power_well->count && i915.disable_power_well) { - DRM_DEBUG_KMS("disabling %s\n", power_well->name); - power_well->hw_enabled = false; - power_well->ops->disable(dev_priv, power_well); - } + gen7_setup_fixed_func_scheduler(dev_priv); - check_power_well_state(dev_priv, power_well); + if (0) { /* causes HiZ corruption on ivb:gt1 */ + /* enable HiZ Raw Stall Optimization */ + I915_WRITE(CACHE_MODE_0_GEN7, + _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE)); } - mutex_unlock(&power_domains->lock); - - intel_runtime_pm_put(dev_priv); -} - -static struct i915_power_domains *hsw_pwr; - -/* Display audio driver power well request */ -int i915_request_power_well(void) -{ - struct drm_i915_private *dev_priv; - - if (!hsw_pwr) - return -ENODEV; + /* WaDisable4x2SubspanOptimization:ivb */ + I915_WRITE(CACHE_MODE_1, + _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - dev_priv = container_of(hsw_pwr, struct drm_i915_private, - power_domains); - intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); - return 0; -} -EXPORT_SYMBOL_GPL(i915_request_power_well); + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + I915_WRITE(GEN7_GT_MODE, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); -/* Display audio driver power well release */ -int i915_release_power_well(void) -{ - struct drm_i915_private *dev_priv; + snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); + snpcr &= ~GEN6_MBC_SNPCR_MASK; + snpcr |= GEN6_MBC_SNPCR_MED; + I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); - if (!hsw_pwr) - return -ENODEV; + if (!HAS_PCH_NOP(dev)) + cpt_init_clock_gating(dev); - dev_priv = container_of(hsw_pwr, struct drm_i915_private, - power_domains); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); - return 0; + gen6_check_mch_setup(dev); } -EXPORT_SYMBOL_GPL(i915_release_power_well); -/* - * Private interface for the audio driver to get CDCLK in kHz. - * - * Caller must request power well using i915_request_power_well() prior to - * making the call. - */ -int i915_get_cdclk_freq(void) +static void valleyview_init_clock_gating(struct drm_device *dev) { - struct drm_i915_private *dev_priv; - - if (!hsw_pwr) - return -ENODEV; - - dev_priv = container_of(hsw_pwr, struct drm_i915_private, - power_domains); - - return intel_ddi_get_cdclk_freq(dev_priv); -} -EXPORT_SYMBOL_GPL(i915_get_cdclk_freq); - - -#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1) - -#define HSW_ALWAYS_ON_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_EDP) | \ - BIT(POWER_DOMAIN_PORT_DDI_A_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_A_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_PLLS) | \ - BIT(POWER_DOMAIN_INIT)) -#define HSW_DISPLAY_POWER_DOMAINS ( \ - (POWER_DOMAIN_MASK & ~HSW_ALWAYS_ON_POWER_DOMAINS) | \ - BIT(POWER_DOMAIN_INIT)) - -#define BDW_ALWAYS_ON_POWER_DOMAINS ( \ - HSW_ALWAYS_ON_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER)) -#define BDW_DISPLAY_POWER_DOMAINS ( \ - (POWER_DOMAIN_MASK & ~BDW_ALWAYS_ON_POWER_DOMAINS) | \ - BIT(POWER_DOMAIN_INIT)) - -#define VLV_ALWAYS_ON_POWER_DOMAINS BIT(POWER_DOMAIN_INIT) -#define VLV_DISPLAY_POWER_DOMAINS POWER_DOMAIN_MASK - -#define VLV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_INIT)) - -#define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_PIPE_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_PIPE_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_PIPE_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_DPIO_CMN_D_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ - BIT(POWER_DOMAIN_INIT)) - -static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { - .sync_hw = i9xx_always_on_power_well_noop, - .enable = i9xx_always_on_power_well_noop, - .disable = i9xx_always_on_power_well_noop, - .is_enabled = i9xx_always_on_power_well_enabled, -}; - -static const struct i915_power_well_ops chv_pipe_power_well_ops = { - .sync_hw = chv_pipe_power_well_sync_hw, - .enable = chv_pipe_power_well_enable, - .disable = chv_pipe_power_well_disable, - .is_enabled = chv_pipe_power_well_enabled, -}; - -static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, - .enable = chv_dpio_cmn_power_well_enable, - .disable = chv_dpio_cmn_power_well_disable, - .is_enabled = vlv_power_well_enabled, -}; - -static struct i915_power_well i9xx_always_on_power_well[] = { - { - .name = "always-on", - .always_on = 1, - .domains = POWER_DOMAIN_MASK, - .ops = &i9xx_always_on_power_well_ops, - }, -}; - -static const struct i915_power_well_ops hsw_power_well_ops = { - .sync_hw = hsw_power_well_sync_hw, - .enable = hsw_power_well_enable, - .disable = hsw_power_well_disable, - .is_enabled = hsw_power_well_enabled, -}; - -static struct i915_power_well hsw_power_wells[] = { - { - .name = "always-on", - .always_on = 1, - .domains = HSW_ALWAYS_ON_POWER_DOMAINS, - .ops = &i9xx_always_on_power_well_ops, - }, - { - .name = "display", - .domains = HSW_DISPLAY_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - }, -}; + struct drm_i915_private *dev_priv = dev->dev_private; -static struct i915_power_well bdw_power_wells[] = { - { - .name = "always-on", - .always_on = 1, - .domains = BDW_ALWAYS_ON_POWER_DOMAINS, - .ops = &i9xx_always_on_power_well_ops, - }, - { - .name = "display", - .domains = BDW_DISPLAY_POWER_DOMAINS, - .ops = &hsw_power_well_ops, - }, -}; + I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); -static const struct i915_power_well_ops vlv_display_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, - .enable = vlv_display_power_well_enable, - .disable = vlv_display_power_well_disable, - .is_enabled = vlv_power_well_enabled, -}; + /* WaDisableEarlyCull:vlv */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); -static const struct i915_power_well_ops vlv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, - .enable = vlv_dpio_cmn_power_well_enable, - .disable = vlv_dpio_cmn_power_well_disable, - .is_enabled = vlv_power_well_enabled, -}; + /* WaDisableBackToBackFlipFix:vlv */ + I915_WRITE(IVB_CHICKEN3, + CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | + CHICKEN3_DGMG_DONE_FIX_DISABLE); -static const struct i915_power_well_ops vlv_dpio_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, - .enable = vlv_power_well_enable, - .disable = vlv_power_well_disable, - .is_enabled = vlv_power_well_enabled, -}; + /* WaPsdDispatchEnable:vlv */ + /* WaDisablePSDDualDispatchEnable:vlv */ + I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, + _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP | + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); -static struct i915_power_well vlv_power_wells[] = { - { - .name = "always-on", - .always_on = 1, - .domains = VLV_ALWAYS_ON_POWER_DOMAINS, - .ops = &i9xx_always_on_power_well_ops, - }, - { - .name = "display", - .domains = VLV_DISPLAY_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DISP2D, - .ops = &vlv_display_power_well_ops, - }, - { - .name = "dpio-tx-b-01", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01, - }, - { - .name = "dpio-tx-b-23", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23, - }, - { - .name = "dpio-tx-c-01", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01, - }, - { - .name = "dpio-tx-c-23", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23, - }, - { - .name = "dpio-common", - .domains = VLV_DPIO_CMN_BC_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DPIO_CMN_BC, - .ops = &vlv_dpio_cmn_power_well_ops, - }, -}; + /* WaDisable_RenderCache_OperationalFlush:vlv */ + I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); -static struct i915_power_well chv_power_wells[] = { - { - .name = "always-on", - .always_on = 1, - .domains = VLV_ALWAYS_ON_POWER_DOMAINS, - .ops = &i9xx_always_on_power_well_ops, - }, -#if 0 - { - .name = "display", - .domains = VLV_DISPLAY_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DISP2D, - .ops = &vlv_display_power_well_ops, - }, - { - .name = "pipe-a", - .domains = CHV_PIPE_A_POWER_DOMAINS, - .data = PIPE_A, - .ops = &chv_pipe_power_well_ops, - }, - { - .name = "pipe-b", - .domains = CHV_PIPE_B_POWER_DOMAINS, - .data = PIPE_B, - .ops = &chv_pipe_power_well_ops, - }, - { - .name = "pipe-c", - .domains = CHV_PIPE_C_POWER_DOMAINS, - .data = PIPE_C, - .ops = &chv_pipe_power_well_ops, - }, -#endif - { - .name = "dpio-common-bc", - /* - * XXX: cmnreset for one PHY seems to disturb the other. - * As a workaround keep both powered on at the same - * time for now. - */ - .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DPIO_CMN_BC, - .ops = &chv_dpio_cmn_power_well_ops, - }, - { - .name = "dpio-common-d", - /* - * XXX: cmnreset for one PHY seems to disturb the other. - * As a workaround keep both powered on at the same - * time for now. - */ - .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DPIO_CMN_D, - .ops = &chv_dpio_cmn_power_well_ops, - }, -#if 0 - { - .name = "dpio-tx-b-01", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01, - }, - { - .name = "dpio-tx-b-23", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23, - }, - { - .name = "dpio-tx-c-01", - .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01, - }, - { - .name = "dpio-tx-c-23", - .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23, - }, - { - .name = "dpio-tx-d-01", - .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS | - CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_01, - }, - { - .name = "dpio-tx-d-23", - .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS | - CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_23, - }, -#endif -}; + /* WaForceL3Serialization:vlv */ + I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & + ~L3SQ_URB_READ_CAM_MATCH_DISABLE); -static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, - enum punit_power_well power_well_id) -{ - struct i915_power_domains *power_domains = &dev_priv->power_domains; - struct i915_power_well *power_well; - int i; + /* WaDisableDopClockGating:vlv */ + I915_WRITE(GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); - for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { - if (power_well->data == power_well_id) - return power_well; - } + /* This is required by WaCatErrorRejectionIssue:vlv */ + I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - return NULL; -} + gen7_setup_fixed_func_scheduler(dev_priv); -#define set_power_wells(power_domains, __power_wells) ({ \ - (power_domains)->power_wells = (__power_wells); \ - (power_domains)->power_well_count = ARRAY_SIZE(__power_wells); \ -}) + /* + * According to the spec, bit 13 (RCZUNIT) must be set on IVB. + * This implements the WaDisableRCZUnitClockGating:vlv workaround. + */ + I915_WRITE(GEN6_UCGCTL2, + GEN6_RCZUNIT_CLOCK_GATE_DISABLE); -int intel_power_domains_init(struct drm_i915_private *dev_priv) -{ - struct i915_power_domains *power_domains = &dev_priv->power_domains; + /* WaDisableL3Bank2xClockGate:vlv + * Disabling L3 clock gating- MMIO 940c[25] = 1 + * Set bit 25, to disable L3_BANK_2x_CLK_GATING */ + I915_WRITE(GEN7_UCGCTL4, + I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE); - mutex_init(&power_domains->lock); + I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); /* - * The enabling order will be from lower to higher indexed wells, - * the disabling order is reversed. + * BSpec says this must be set, even though + * WaDisable4x2SubspanOptimization isn't listed for VLV. */ - if (IS_HASWELL(dev_priv->dev)) { - set_power_wells(power_domains, hsw_power_wells); - hsw_pwr = power_domains; - } else if (IS_BROADWELL(dev_priv->dev)) { - set_power_wells(power_domains, bdw_power_wells); - hsw_pwr = power_domains; - } else if (IS_CHERRYVIEW(dev_priv->dev)) { - set_power_wells(power_domains, chv_power_wells); - } else if (IS_VALLEYVIEW(dev_priv->dev)) { - set_power_wells(power_domains, vlv_power_wells); - } else { - set_power_wells(power_domains, i9xx_always_on_power_well); - } + I915_WRITE(CACHE_MODE_1, + _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); - return 0; -} + /* + * WaIncreaseL3CreditsForVLVB0:vlv + * This is the hardware default actually. + */ + I915_WRITE(GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE); -void intel_power_domains_remove(struct drm_i915_private *dev_priv) -{ - hsw_pwr = NULL; + /* + * WaDisableVLVClockGating_VBIIssue:vlv + * Disable clock gating on th GCFG unit to prevent a delay + * in the reporting of vblank events. + */ + I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); } -static void intel_power_domains_resume(struct drm_i915_private *dev_priv) +static void cherryview_init_clock_gating(struct drm_device *dev) { - struct i915_power_domains *power_domains = &dev_priv->power_domains; - struct i915_power_well *power_well; - int i; - - mutex_lock(&power_domains->lock); - for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { - power_well->ops->sync_hw(dev_priv, power_well); - power_well->hw_enabled = power_well->ops->is_enabled(dev_priv, - power_well); - } - mutex_unlock(&power_domains->lock); -} + struct drm_i915_private *dev_priv = dev->dev_private; -static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv) -{ - struct i915_power_well *cmn = - lookup_power_well(dev_priv, PUNIT_POWER_WELL_DPIO_CMN_BC); - struct i915_power_well *disp2d = - lookup_power_well(dev_priv, PUNIT_POWER_WELL_DISP2D); + I915_WRITE(DSPCLK_GATE_D, VRHUNIT_CLOCK_GATE_DISABLE); - /* nothing to do if common lane is already off */ - if (!cmn->ops->is_enabled(dev_priv, cmn)) - return; + I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); - /* If the display might be already active skip this */ - if (disp2d->ops->is_enabled(dev_priv, disp2d) && - I915_READ(DPIO_CTL) & DPIO_CMNRST) - return; + /* WaVSRefCountFullforceMissDisable:chv */ + /* WaDSRefCountFullforceMissDisable:chv */ + I915_WRITE(GEN7_FF_THREAD_MODE, + I915_READ(GEN7_FF_THREAD_MODE) & + ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); - DRM_DEBUG_KMS("toggling display PHY side reset\n"); + /* WaDisableSemaphoreAndSyncFlipWait:chv */ + I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, + _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); - /* cmnlane needs DPLL registers */ - disp2d->ops->enable(dev_priv, disp2d); + /* WaDisableCSUnitClockGating:chv */ + I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | + GEN6_CSUNIT_CLOCK_GATE_DISABLE); - /* - * From VLV2A0_DP_eDP_HDMI_DPIO_driver_vbios_notes_11.docx: - * Need to assert and de-assert PHY SB reset by gating the - * common lane power, then un-gating it. - * Simply ungating isn't enough to reset the PHY enough to get - * ports and lanes running. - */ - cmn->ops->disable(dev_priv, cmn); + /* WaDisableSDEUnitClockGating:chv */ + I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | + GEN8_SDEUNIT_CLOCK_GATE_DISABLE); } -void intel_power_domains_init_hw(struct drm_i915_private *dev_priv) +static void g4x_init_clock_gating(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t dspclk_gate; - power_domains->initializing = true; + I915_WRITE(RENCLK_GATE_D1, 0); + I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE | + GS_UNIT_CLOCK_GATE_DISABLE | + CL_UNIT_CLOCK_GATE_DISABLE); + I915_WRITE(RAMCLK_GATE_D, 0); + dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE | + OVRUNIT_CLOCK_GATE_DISABLE | + OVCUNIT_CLOCK_GATE_DISABLE; + if (IS_GM45(dev)) + dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; + I915_WRITE(DSPCLK_GATE_D, dspclk_gate); - if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) { - mutex_lock(&power_domains->lock); - vlv_cmnlane_wa(dev_priv); - mutex_unlock(&power_domains->lock); - } + /* WaDisableRenderCachePipelinedFlush */ + I915_WRITE(CACHE_MODE_0, + _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); - /* For now, we need the power well to be always enabled. */ - intel_display_set_init_power(dev_priv, true); - intel_power_domains_resume(dev_priv); - power_domains->initializing = false; -} + /* WaDisable_RenderCache_OperationalFlush:g4x */ + I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); -void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv) -{ - intel_runtime_pm_get(dev_priv); + g4x_disable_trickle_feed(dev); } -void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv) +static void crestline_init_clock_gating(struct drm_device *dev) { - intel_runtime_pm_put(dev_priv); + struct drm_i915_private *dev_priv = dev->dev_private; + + I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); + I915_WRITE(RENCLK_GATE_D2, 0); + I915_WRITE(DSPCLK_GATE_D, 0); + I915_WRITE(RAMCLK_GATE_D, 0); + I915_WRITE16(DEUC, 0); + I915_WRITE(MI_ARB_STATE, + _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); + + /* WaDisable_RenderCache_OperationalFlush:gen4 */ + I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } -void intel_runtime_pm_get(struct drm_i915_private *dev_priv) +static void broadwater_init_clock_gating(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - struct device *device = &dev->pdev->dev; + struct drm_i915_private *dev_priv = dev->dev_private; - if (!HAS_RUNTIME_PM(dev)) - return; + I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | + I965_RCC_CLOCK_GATE_DISABLE | + I965_RCPB_CLOCK_GATE_DISABLE | + I965_ISC_CLOCK_GATE_DISABLE | + I965_FBC_CLOCK_GATE_DISABLE); + I915_WRITE(RENCLK_GATE_D2, 0); + I915_WRITE(MI_ARB_STATE, + _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); - pm_runtime_get_sync(device); - WARN(dev_priv->pm.suspended, "Device still suspended.\n"); + /* WaDisable_RenderCache_OperationalFlush:gen4 */ + I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } -void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv) +static void gen3_init_clock_gating(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - struct device *device = &dev->pdev->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 dstate = I915_READ(D_STATE); - if (!HAS_RUNTIME_PM(dev)) - return; + dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | + DSTATE_DOT_CLOCK_GATING; + I915_WRITE(D_STATE, dstate); - WARN(dev_priv->pm.suspended, "Getting nosync-ref while suspended.\n"); - pm_runtime_get_noresume(device); -} + if (IS_PINEVIEW(dev)) + I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); -void intel_runtime_pm_put(struct drm_i915_private *dev_priv) -{ - struct drm_device *dev = dev_priv->dev; - struct device *device = &dev->pdev->dev; + /* IIR "flip pending" means done if this bit is set */ + I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE)); - if (!HAS_RUNTIME_PM(dev)) - return; + /* interrupts should cause a wake up from C3 */ + I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_AGPBUSY_INT_EN)); - pm_runtime_mark_last_busy(device); - pm_runtime_put_autosuspend(device); + /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ + I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); + + I915_WRITE(MI_ARB_STATE, + _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); } -void intel_init_runtime_pm(struct drm_i915_private *dev_priv) +static void i85x_init_clock_gating(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - struct device *device = &dev->pdev->dev; + struct drm_i915_private *dev_priv = dev->dev_private; - if (!HAS_RUNTIME_PM(dev)) - return; + I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); + + /* interrupts should cause a wake up from C3 */ + I915_WRITE(MI_STATE, _MASKED_BIT_ENABLE(MI_AGPBUSY_INT_EN) | + _MASKED_BIT_DISABLE(MI_AGPBUSY_830_MODE)); - pm_runtime_set_active(device); + I915_WRITE(MEM_MODE, + _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); +} - /* - * RPM depends on RC6 to save restore the GT HW context, so make RC6 a - * requirement. - */ - if (!intel_enable_rc6(dev)) { - DRM_INFO("RC6 disabled, disabling runtime PM support\n"); - return; - } +static void i830_init_clock_gating(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; - pm_runtime_set_autosuspend_delay(device, 10000); /* 10s */ - pm_runtime_mark_last_busy(device); - pm_runtime_use_autosuspend(device); + I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); - pm_runtime_put_autosuspend(device); + I915_WRITE(MEM_MODE, + _MASKED_BIT_ENABLE(MEM_DISPLAY_A_TRICKLE_FEED_DISABLE) | + _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); } -void intel_fini_runtime_pm(struct drm_i915_private *dev_priv) +void intel_init_clock_gating(struct drm_device *dev) { - struct drm_device *dev = dev_priv->dev; - struct device *device = &dev->pdev->dev; - - if (!HAS_RUNTIME_PM(dev)) - return; + struct drm_i915_private *dev_priv = dev->dev_private; - if (!intel_enable_rc6(dev)) - return; + dev_priv->display.init_clock_gating(dev); +} - /* Make sure we're not suspended first. */ - pm_runtime_get_sync(device); - pm_runtime_disable(device); +void intel_suspend_hw(struct drm_device *dev) +{ + if (HAS_PCH_LPT(dev)) + lpt_suspend_hw(dev); } /* Set up chip specific power management-related functions */ @@ -7198,28 +6320,7 @@ void intel_init_pm(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - if (HAS_FBC(dev)) { - if (INTEL_INFO(dev)->gen >= 7) { - dev_priv->display.fbc_enabled = ironlake_fbc_enabled; - dev_priv->display.enable_fbc = gen7_enable_fbc; - dev_priv->display.disable_fbc = ironlake_disable_fbc; - } else if (INTEL_INFO(dev)->gen >= 5) { - dev_priv->display.fbc_enabled = ironlake_fbc_enabled; - dev_priv->display.enable_fbc = ironlake_enable_fbc; - dev_priv->display.disable_fbc = ironlake_disable_fbc; - } else if (IS_GM45(dev)) { - dev_priv->display.fbc_enabled = g4x_fbc_enabled; - dev_priv->display.enable_fbc = g4x_enable_fbc; - dev_priv->display.disable_fbc = g4x_disable_fbc; - } else { - dev_priv->display.fbc_enabled = i8xx_fbc_enabled; - dev_priv->display.enable_fbc = i8xx_enable_fbc; - dev_priv->display.disable_fbc = i8xx_disable_fbc; - - /* This value was pulled out of someone's hat */ - I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT); - } - } + intel_fbc_init(dev_priv); /* For cxsr */ if (IS_PINEVIEW(dev)) @@ -7228,7 +6329,13 @@ void intel_init_pm(struct drm_device *dev) i915_ironlake_get_mem_freq(dev); /* For FIFO watermark updates */ - if (HAS_PCH_SPLIT(dev)) { + if (INTEL_INFO(dev)->gen >= 9) { + skl_setup_wm_latency(dev); + + dev_priv->display.init_clock_gating = gen9_init_clock_gating; + dev_priv->display.update_wm = skl_update_wm; + dev_priv->display.update_sprite_wm = skl_update_sprite_wm; + } else if (HAS_PCH_SPLIT(dev)) { ilk_setup_wm_latency(dev); if ((IS_GEN5(dev) && dev_priv->wm.pri_latency[1] && @@ -7309,7 +6416,7 @@ void intel_init_pm(struct drm_device *dev) } } -int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val) +int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) { WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); @@ -7319,6 +6426,7 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val) } I915_WRITE(GEN6_PCODE_DATA, *val); + I915_WRITE(GEN6_PCODE_DATA1, 0); I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, @@ -7333,7 +6441,7 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val) return 0; } -int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val) +int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u32 mbox, u32 val) { WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); @@ -7356,99 +6464,66 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val) return 0; } -static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) +static int vlv_gpu_freq_div(unsigned int czclk_freq) { - int div; - - /* 4 x czclk */ - switch (dev_priv->mem_freq) { - case 800: - div = 10; - break; - case 1066: - div = 12; - break; - case 1333: - div = 16; - break; + switch (czclk_freq) { + case 200: + return 10; + case 267: + return 12; + case 320: + case 333: + return 16; + case 400: + return 20; default: return -1; } +} + +static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val) +{ + int div, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4); - return DIV_ROUND_CLOSEST(dev_priv->mem_freq * (val + 6 - 0xbd), 4 * div); + div = vlv_gpu_freq_div(czclk_freq); + if (div < 0) + return div; + + return DIV_ROUND_CLOSEST(czclk_freq * (val + 6 - 0xbd), div); } static int byt_freq_opcode(struct drm_i915_private *dev_priv, int val) { - int mul; + int mul, czclk_freq = DIV_ROUND_CLOSEST(dev_priv->mem_freq, 4); - /* 4 x czclk */ - switch (dev_priv->mem_freq) { - case 800: - mul = 10; - break; - case 1066: - mul = 12; - break; - case 1333: - mul = 16; - break; - default: - return -1; - } + mul = vlv_gpu_freq_div(czclk_freq); + if (mul < 0) + return mul; - return DIV_ROUND_CLOSEST(4 * mul * val, dev_priv->mem_freq) + 0xbd - 6; + return DIV_ROUND_CLOSEST(mul * val, czclk_freq) + 0xbd - 6; } static int chv_gpu_freq(struct drm_i915_private *dev_priv, int val) { - int div, freq; - - switch (dev_priv->rps.cz_freq) { - case 200: - div = 5; - break; - case 267: - div = 6; - break; - case 320: - case 333: - case 400: - div = 8; - break; - default: - return -1; - } + int div, czclk_freq = dev_priv->rps.cz_freq; - freq = (DIV_ROUND_CLOSEST((dev_priv->rps.cz_freq * val), 2 * div) / 2); + div = vlv_gpu_freq_div(czclk_freq) / 2; + if (div < 0) + return div; - return freq; + return DIV_ROUND_CLOSEST(czclk_freq * val, 2 * div) / 2; } static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) { - int mul, opcode; + int mul, czclk_freq = dev_priv->rps.cz_freq; - switch (dev_priv->rps.cz_freq) { - case 200: - mul = 5; - break; - case 267: - mul = 6; - break; - case 320: - case 333: - case 400: - mul = 8; - break; - default: - return -1; - } + mul = vlv_gpu_freq_div(czclk_freq) / 2; + if (mul < 0) + return mul; /* CHV needs even values */ - opcode = (DIV_ROUND_CLOSEST((val * 2 * mul), dev_priv->rps.cz_freq) * 2); - - return opcode; + return DIV_ROUND_CLOSEST(val * 2 * mul, czclk_freq) * 2; } int vlv_gpu_freq(struct drm_i915_private *dev_priv, int val) @@ -7485,5 +6560,4 @@ void intel_pm_setup(struct drm_device *dev) intel_gen6_powersave_work); dev_priv->pm.suspended = false; - dev_priv->pm._irqs_disabled = false; }