diff options
author | Jean-Sébastien Pédron <dumbbell@FreeBSD.org> | 2016-03-08 20:33:02 +0000 |
---|---|---|
committer | Jean-Sébastien Pédron <dumbbell@FreeBSD.org> | 2016-03-08 20:33:02 +0000 |
commit | 740be6d75549c33e73ba8c991c605c649bd92d03 (patch) | |
tree | 28a71ac77183e006bdc64bffca2daf00e060887b /sys/dev/drm2/i915/intel_pm.c | |
parent | a87488d1e468d1bc6836b50033073624eb33cbfb (diff) | |
download | src-740be6d75549c33e73ba8c991c605c649bd92d03.tar.gz src-740be6d75549c33e73ba8c991c605c649bd92d03.zip |
drm/i915: Update to match Linux 3.8.13
This update brings initial support for Haswell GPUs.
Tested by: Many users of FreeBSD, PC-BSD and HardenedBSD
Relnotes: yes
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D5554
Notes
Notes:
svn path=/head/; revision=296548
Diffstat (limited to 'sys/dev/drm2/i915/intel_pm.c')
-rw-r--r-- | sys/dev/drm2/i915/intel_pm.c | 1481 |
1 files changed, 1086 insertions, 395 deletions
diff --git a/sys/dev/drm2/i915/intel_pm.c b/sys/dev/drm2/i915/intel_pm.c index 90f513d811da..ab9eee4be5eb 100644 --- a/sys/dev/drm2/i915/intel_pm.c +++ b/sys/dev/drm2/i915/intel_pm.c @@ -29,23 +29,11 @@ __FBSDID("$FreeBSD$"); #include <dev/drm2/drmP.h> -#include <dev/drm2/drm.h> -#include <dev/drm2/i915/i915_drm.h> #include <dev/drm2/i915/i915_drv.h> #include <dev/drm2/i915/intel_drv.h> #include <sys/kdb.h> -static struct drm_i915_private *i915_mch_dev; -/* - * Lock protecting IPS related data structures - * - i915_mch_dev - * - dev_priv->max_delay - * - dev_priv->min_delay - * - dev_priv->fmax - * - dev_priv->gpu_busy - */ -static struct mtx mchdev_lock; -MTX_SYSINIT(mchdev, &mchdev_lock, "mchdev", MTX_DEF); +#define FORCEWAKE_ACK_TIMEOUT_MS 2 /* FBC, or Frame Buffer Compression, is a technique employed to compress the * framebuffer contents in-memory, aiming at reducing the required bandwidth @@ -58,6 +46,14 @@ MTX_SYSINIT(mchdev, &mchdev_lock, "mchdev", MTX_DEF); * i915.i915_enable_fbc parameter */ +static bool intel_crtc_active(struct drm_crtc *crtc) +{ + /* Be paranoid as we can arrive here with only partial + * state retrieved from the hardware during setup. + */ + return to_intel_crtc(crtc)->active && crtc->fb && crtc->mode.clock; +} + static void i8xx_disable_fbc(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -298,8 +294,6 @@ static void intel_fbc_work_fn(void *arg, int pending) static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv) { - u_int pending; - if (dev_priv->fbc_work == NULL) return; @@ -309,8 +303,8 @@ static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv) * dev_priv->fbc_work, so we can perform the cancellation * entirely asynchronously. */ - if (taskqueue_cancel_timeout(dev_priv->tq, &dev_priv->fbc_work->task, - &pending) == 0) + if (taskqueue_cancel_timeout(dev_priv->wq, &dev_priv->fbc_work->work, + NULL) == 0) /* tasklet was killed before being run, clean up */ free(dev_priv->fbc_work, DRM_MEM_KMS); @@ -333,12 +327,16 @@ void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval) intel_cancel_fbc_work(dev_priv); - work = malloc(sizeof(*work), DRM_MEM_KMS, M_WAITOK | M_ZERO); + work = malloc(sizeof *work, DRM_MEM_KMS, M_WAITOK | M_ZERO); + if (work == NULL) { + dev_priv->display.enable_fbc(crtc, interval); + return; + } work->crtc = crtc; work->fb = crtc->fb; work->interval = interval; - TIMEOUT_TASK_INIT(dev_priv->tq, &work->task, 0, intel_fbc_work_fn, + TIMEOUT_TASK_INIT(dev_priv->wq, &work->work, 0, intel_fbc_work_fn, work); dev_priv->fbc_work = work; @@ -356,7 +354,7 @@ void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval) * and indeed performing the enable as a co-routine and not * waiting synchronously upon the vblank. */ - taskqueue_enqueue_timeout(dev_priv->tq, &work->task, + taskqueue_enqueue_timeout(dev_priv->wq, &work->work, msecs_to_jiffies(50)); } @@ -402,8 +400,6 @@ void intel_update_fbc(struct drm_device *dev) struct drm_i915_gem_object *obj; int enable_fbc; - DRM_DEBUG_KMS("\n"); - if (!i915_powersave) return; @@ -420,7 +416,8 @@ void intel_update_fbc(struct drm_device *dev) * - going to an unsupported config (interlace, pixel multiply, etc.) */ list_for_each_entry(tmp_crtc, &dev->mode_config.crtc_list, head) { - if (tmp_crtc->enabled && tmp_crtc->fb) { + if (intel_crtc_active(tmp_crtc) && + !to_intel_crtc(tmp_crtc)->primary_disabled) { if (crtc) { DRM_DEBUG_KMS("more than one pipe active, disabling compression\n"); dev_priv->no_fbc_reason = FBC_MULTIPLE_PIPES; @@ -608,7 +605,7 @@ static void i915_ironlake_get_mem_freq(struct drm_device *dev) break; } - dev_priv->r_t = dev_priv->mem_freq; + dev_priv->ips.r_t = dev_priv->mem_freq; switch (csipll & 0x3ff) { case 0x00c: @@ -640,11 +637,11 @@ static void i915_ironlake_get_mem_freq(struct drm_device *dev) } if (dev_priv->fsb_freq == 3200) { - dev_priv->c_m = 0; + dev_priv->ips.c_m = 0; } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) { - dev_priv->c_m = 1; + dev_priv->ips.c_m = 1; } else { - dev_priv->c_m = 2; + dev_priv->ips.c_m = 2; } } @@ -697,7 +694,7 @@ static const struct cxsr_latency *intel_get_cxsr_latency(int is_desktop, if (fsb == 0 || mem == 0) return NULL; - for (i = 0; i < DRM_ARRAY_SIZE(cxsr_latency_table); i++) { + for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) { latency = &cxsr_latency_table[i]; if (is_desktop == latency->is_desktop && is_ddr3 == latency->is_ddr3 && @@ -1005,7 +1002,7 @@ static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) struct drm_crtc *crtc, *enabled = NULL; list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - if (crtc->enabled && crtc->fb) { + if (intel_crtc_active(crtc)) { if (enabled) return NULL; enabled = crtc; @@ -1099,7 +1096,7 @@ static bool g4x_compute_wm0(struct drm_device *dev, int entries, tlb_miss; crtc = intel_get_crtc_for_plane(dev, plane); - if (crtc->fb == NULL || !crtc->enabled) { + if (!intel_crtc_active(crtc)) { *cursor_wm = cursor->guard_size; *plane_wm = display->guard_size; return false; @@ -1228,7 +1225,7 @@ static bool vlv_compute_drain_latency(struct drm_device *dev, int entries; crtc = intel_get_crtc_for_plane(dev, plane); - if (crtc->fb == NULL || !crtc->enabled) + if (!intel_crtc_active(crtc)) return false; clock = crtc->mode.clock; /* VESA DOT Clock */ @@ -1299,6 +1296,7 @@ static void valleyview_update_wm(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int planea_wm, planeb_wm, cursora_wm, cursorb_wm; int plane_sr, cursor_sr; + int ignore_plane_sr, ignore_cursor_sr; unsigned int enabled = 0; vlv_update_drain_latency(dev); @@ -1315,17 +1313,23 @@ static void valleyview_update_wm(struct drm_device *dev) &planeb_wm, &cursorb_wm)) enabled |= 2; - plane_sr = cursor_sr = 0; if (single_plane_enabled(enabled) && g4x_compute_srwm(dev, ffs(enabled) - 1, sr_latency_ns, &valleyview_wm_info, &valleyview_cursor_wm_info, - &plane_sr, &cursor_sr)) + &plane_sr, &ignore_cursor_sr) && + g4x_compute_srwm(dev, ffs(enabled) - 1, + 2*sr_latency_ns, + &valleyview_wm_info, + &valleyview_cursor_wm_info, + &ignore_plane_sr, &cursor_sr)) { I915_WRITE(FW_BLC_SELF_VLV, FW_CSPWRDWNEN); - else + } else { I915_WRITE(FW_BLC_SELF_VLV, I915_READ(FW_BLC_SELF_VLV) & ~FW_CSPWRDWNEN); + plane_sr = cursor_sr = 0; + } DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", planea_wm, cursora_wm, @@ -1338,10 +1342,11 @@ static void valleyview_update_wm(struct drm_device *dev) (planeb_wm << DSPFW_PLANEB_SHIFT) | planea_wm); I915_WRITE(DSPFW2, - (I915_READ(DSPFW2) & DSPFW_CURSORA_MASK) | + (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | (cursora_wm << DSPFW_CURSORA_SHIFT)); I915_WRITE(DSPFW3, - (I915_READ(DSPFW3) | (cursor_sr << DSPFW_CURSOR_SR_SHIFT))); + (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) | + (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); } static void g4x_update_wm(struct drm_device *dev) @@ -1364,17 +1369,18 @@ static void g4x_update_wm(struct drm_device *dev) &planeb_wm, &cursorb_wm)) enabled |= 2; - plane_sr = cursor_sr = 0; if (single_plane_enabled(enabled) && g4x_compute_srwm(dev, ffs(enabled) - 1, sr_latency_ns, &g4x_wm_info, &g4x_cursor_wm_info, - &plane_sr, &cursor_sr)) + &plane_sr, &cursor_sr)) { I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN); - else + } else { I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF) & ~FW_BLC_SELF_EN); + plane_sr = cursor_sr = 0; + } DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", planea_wm, cursora_wm, @@ -1387,11 +1393,11 @@ static void g4x_update_wm(struct drm_device *dev) (planeb_wm << DSPFW_PLANEB_SHIFT) | planea_wm); I915_WRITE(DSPFW2, - (I915_READ(DSPFW2) & DSPFW_CURSORA_MASK) | + (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | (cursora_wm << DSPFW_CURSORA_SHIFT)); /* HPLL off in SR has some issues on G4x... disable it */ I915_WRITE(DSPFW3, - (I915_READ(DSPFW3) & ~DSPFW_HPLL_SR_EN) | + (I915_READ(DSPFW3) & ~(DSPFW_HPLL_SR_EN | DSPFW_CURSOR_SR_MASK)) | (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); } @@ -1480,10 +1486,13 @@ static void i9xx_update_wm(struct drm_device *dev) fifo_size = dev_priv->display.get_fifo_size(dev, 0); crtc = intel_get_crtc_for_plane(dev, 0); - if (crtc->enabled && crtc->fb) { + if (intel_crtc_active(crtc)) { + int cpp = crtc->fb->bits_per_pixel / 8; + if (IS_GEN2(dev)) + cpp = 4; + planea_wm = intel_calculate_wm(crtc->mode.clock, - wm_info, fifo_size, - crtc->fb->bits_per_pixel / 8, + wm_info, fifo_size, cpp, latency_ns); enabled = crtc; } else @@ -1491,10 +1500,13 @@ static void i9xx_update_wm(struct drm_device *dev) fifo_size = dev_priv->display.get_fifo_size(dev, 1); crtc = intel_get_crtc_for_plane(dev, 1); - if (crtc->enabled && crtc->fb) { + if (intel_crtc_active(crtc)) { + int cpp = crtc->fb->bits_per_pixel / 8; + if (IS_GEN2(dev)) + cpp = 4; + planeb_wm = intel_calculate_wm(crtc->mode.clock, - wm_info, fifo_size, - crtc->fb->bits_per_pixel / 8, + wm_info, fifo_size, cpp, latency_ns); if (enabled == NULL) enabled = crtc; @@ -1584,8 +1596,7 @@ static void i830_update_wm(struct drm_device *dev) planea_wm = intel_calculate_wm(crtc->mode.clock, &i830_wm_info, dev_priv->display.get_fifo_size(dev, 0), - crtc->fb->bits_per_pixel / 8, - latency_ns); + 4, latency_ns); fwater_lo = I915_READ(FW_BLC) & ~0xfff; fwater_lo |= (3<<8) | planea_wm; @@ -1818,8 +1829,110 @@ static void sandybridge_update_wm(struct drm_device *dev) enabled |= 2; } - if ((dev_priv->num_pipe == 3) && - g4x_compute_wm0(dev, 2, + /* + * Calculate and update the self-refresh watermark only when one + * display plane is used. + * + * SNB support 3 levels of watermark. + * + * WM1/WM2/WM2 watermarks have to be enabled in the ascending order, + * and disabled in the descending order + * + */ + I915_WRITE(WM3_LP_ILK, 0); + I915_WRITE(WM2_LP_ILK, 0); + I915_WRITE(WM1_LP_ILK, 0); + + if (!single_plane_enabled(enabled) || + dev_priv->sprite_scaling_enabled) + return; + enabled = ffs(enabled) - 1; + + /* WM1 */ + if (!ironlake_compute_srwm(dev, 1, enabled, + SNB_READ_WM1_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; + + I915_WRITE(WM1_LP_ILK, + WM1_LP_SR_EN | + (SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); + + /* WM2 */ + if (!ironlake_compute_srwm(dev, 2, enabled, + SNB_READ_WM2_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; + + I915_WRITE(WM2_LP_ILK, + WM2_LP_EN | + (SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); + + /* WM3 */ + if (!ironlake_compute_srwm(dev, 3, enabled, + SNB_READ_WM3_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; + + I915_WRITE(WM3_LP_ILK, + WM3_LP_EN | + (SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); +} + +static void ivybridge_update_wm(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int latency = SNB_READ_WM0_LATENCY() * 100; /* In unit 0.1us */ + u32 val; + int fbc_wm, plane_wm, cursor_wm; + int ignore_fbc_wm, ignore_plane_wm, ignore_cursor_wm; + unsigned int enabled; + + enabled = 0; + if (g4x_compute_wm0(dev, 0, + &sandybridge_display_wm_info, latency, + &sandybridge_cursor_wm_info, latency, + &plane_wm, &cursor_wm)) { + val = I915_READ(WM0_PIPEA_ILK); + val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK); + I915_WRITE(WM0_PIPEA_ILK, val | + ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm)); + DRM_DEBUG_KMS("FIFO watermarks For pipe A -" + " plane %d, " "cursor: %d\n", + plane_wm, cursor_wm); + enabled |= 1; + } + + if (g4x_compute_wm0(dev, 1, + &sandybridge_display_wm_info, latency, + &sandybridge_cursor_wm_info, latency, + &plane_wm, &cursor_wm)) { + val = I915_READ(WM0_PIPEB_ILK); + val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK); + I915_WRITE(WM0_PIPEB_ILK, val | + ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm)); + DRM_DEBUG_KMS("FIFO watermarks For pipe B -" + " plane %d, cursor: %d\n", + plane_wm, cursor_wm); + enabled |= 2; + } + + if (g4x_compute_wm0(dev, 2, &sandybridge_display_wm_info, latency, &sandybridge_cursor_wm_info, latency, &plane_wm, &cursor_wm)) { @@ -1882,12 +1995,17 @@ static void sandybridge_update_wm(struct drm_device *dev) (plane_wm << WM1_LP_SR_SHIFT) | cursor_wm); - /* WM3 */ + /* WM3, note we have to correct the cursor latency */ if (!ironlake_compute_srwm(dev, 3, enabled, SNB_READ_WM3_LATENCY() * 500, &sandybridge_display_srwm_info, &sandybridge_cursor_srwm_info, - &fbc_wm, &plane_wm, &cursor_wm)) + &fbc_wm, &plane_wm, &ignore_cursor_wm) || + !ironlake_compute_srwm(dev, 3, enabled, + 2 * SNB_READ_WM3_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &ignore_fbc_wm, &ignore_plane_wm, &cursor_wm)) return; I915_WRITE(WM3_LP_ILK, @@ -1936,7 +2054,7 @@ sandybridge_compute_sprite_wm(struct drm_device *dev, int plane, int entries, tlb_miss; crtc = intel_get_crtc_for_plane(dev, plane); - if (crtc->fb == NULL || !crtc->enabled) { + if (!intel_crtc_active(crtc)) { *sprite_wm = display->guard_size; return false; } @@ -2153,7 +2271,7 @@ intel_alloc_context_page(struct drm_device *dev) return NULL; } - ret = i915_gem_object_pin(ctx, 4096, true); + ret = i915_gem_object_pin(ctx, 4096, true, false); if (ret) { DRM_ERROR("failed to pin power context: %d\n", ret); goto err_unref; @@ -2175,11 +2293,23 @@ err_unref: return NULL; } +/** + * Lock protecting IPS related data structures + */ +struct mtx mchdev_lock; +MTX_SYSINIT(mchdev, &mchdev_lock, "mchdev", MTX_DEF); + +/* Global for IPS driver to get at the current i915 device. Protected by + * mchdev_lock. */ +static struct drm_i915_private *i915_mch_dev; + bool ironlake_set_drps(struct drm_device *dev, u8 val) { struct drm_i915_private *dev_priv = dev->dev_private; u16 rgvswctl; + mtx_assert(&mchdev_lock, MA_OWNED); + rgvswctl = I915_READ16(MEMSWCTL); if (rgvswctl & MEMCTL_CMD_STS) { DRM_DEBUG("gpu busy, RCS change rejected\n"); @@ -2197,12 +2327,14 @@ bool ironlake_set_drps(struct drm_device *dev, u8 val) return true; } -void ironlake_enable_drps(struct drm_device *dev) +static void ironlake_enable_drps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; u32 rgvmodectl = I915_READ(MEMMODECTL); u8 fmax, fmin, fstart, vstart; + mtx_lock(&mchdev_lock); + /* Enable temp reporting */ I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN); I915_WRITE16(TSC1, I915_READ(TSC1) | TSE); @@ -2226,12 +2358,12 @@ void ironlake_enable_drps(struct drm_device *dev) vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT; - dev_priv->fmax = fmax; /* IPS callback will increase this */ - dev_priv->fstart = fstart; + dev_priv->ips.fmax = fmax; /* IPS callback will increase this */ + dev_priv->ips.fstart = fstart; - dev_priv->max_delay = fstart; - dev_priv->min_delay = fmin; - dev_priv->cur_delay = fstart; + dev_priv->ips.max_delay = fstart; + dev_priv->ips.min_delay = fmin; + dev_priv->ips.cur_delay = fstart; DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n", fmax, fmin, fstart); @@ -2248,23 +2380,29 @@ void ironlake_enable_drps(struct drm_device *dev) rgvmodectl |= MEMMODE_SWMODE_EN; I915_WRITE(MEMMODECTL, rgvmodectl); - if (wait_for((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) + if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) DRM_ERROR("stuck trying to change perf mode\n"); - pause("915dsp", 1); + mdelay(1); ironlake_set_drps(dev, fstart); - dev_priv->last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) + + dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) + I915_READ(0x112e0); - dev_priv->last_time1 = jiffies_to_msecs(jiffies); - dev_priv->last_count2 = I915_READ(0x112f4); - nanotime(&dev_priv->last_time2); + dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies); + dev_priv->ips.last_count2 = I915_READ(0x112f4); + getrawmonotonic(&dev_priv->ips.last_time2); + + mtx_unlock(&mchdev_lock); } -void ironlake_disable_drps(struct drm_device *dev) +static void ironlake_disable_drps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - u16 rgvswctl = I915_READ16(MEMSWCTL); + u16 rgvswctl; + + mtx_lock(&mchdev_lock); + + rgvswctl = I915_READ16(MEMSWCTL); /* Ack interrupts, disable EFC interrupt */ I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN); @@ -2274,27 +2412,76 @@ void ironlake_disable_drps(struct drm_device *dev) I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT); /* Go back to the starting frequency */ - ironlake_set_drps(dev, dev_priv->fstart); - pause("915dsp", 1); + ironlake_set_drps(dev, dev_priv->ips.fstart); + mdelay(1); rgvswctl |= MEMCTL_CMD_STS; I915_WRITE(MEMSWCTL, rgvswctl); - pause("915dsp", 1); + mdelay(1); + + mtx_unlock(&mchdev_lock); +} + +/* There's a funny hw issue where the hw returns all 0 when reading from + * GEN6_RP_INTERRUPT_LIMITS. Hence we always need to compute the desired value + * ourselves, instead of doing a rmw cycle (which might result in us clearing + * all limits and the gpu stuck at whatever frequency it is at atm). + */ +static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 *val) +{ + u32 limits; + + limits = 0; + + if (*val >= dev_priv->rps.max_delay) + *val = dev_priv->rps.max_delay; + limits |= dev_priv->rps.max_delay << 24; + + /* Only set the down limit when we've reached the lowest level to avoid + * getting more interrupts, otherwise leave this clear. This prevents a + * race in the hw when coming out of rc6: There's a tiny window where + * the hw runs at the minimal clock before selecting the desired + * frequency, if the down threshold expires in that window we will not + * receive a down interrupt. */ + if (*val <= dev_priv->rps.min_delay) { + *val = dev_priv->rps.min_delay; + limits |= dev_priv->rps.min_delay << 16; + } + return limits; } void gen6_set_rps(struct drm_device *dev, u8 val) { struct drm_i915_private *dev_priv = dev->dev_private; - u32 swreq; + u32 limits = gen6_rps_limits(dev_priv, &val); + + sx_assert(&dev_priv->rps.hw_lock, SA_XLOCKED); + WARN_ON(val > dev_priv->rps.max_delay); + WARN_ON(val < dev_priv->rps.min_delay); + + if (val == dev_priv->rps.cur_delay) + return; + + I915_WRITE(GEN6_RPNSWREQ, + GEN6_FREQUENCY(val) | + GEN6_OFFSET(0) | + GEN6_AGGRESSIVE_TURBO); + + /* Make sure we continue to get interrupts + * until we hit the minimum or maximum frequencies. + */ + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits); - swreq = (val & 0x3ff) << 25; - I915_WRITE(GEN6_RPNSWREQ, swreq); + POSTING_READ(GEN6_RPNSWREQ); + + dev_priv->rps.cur_delay = val; } -void gen6_disable_rps(struct drm_device *dev) +static void gen6_disable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + I915_WRITE(GEN6_RC_CONTROL, 0); I915_WRITE(GEN6_RPNSWREQ, 1 << 31); I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); I915_WRITE(GEN6_PMIER, 0); @@ -2303,52 +2490,50 @@ void gen6_disable_rps(struct drm_device *dev) * register (PMIMR) to mask PM interrupts. The only risk is in leaving * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */ - mtx_lock(&dev_priv->rps_lock); - dev_priv->pm_iir = 0; - mtx_unlock(&dev_priv->rps_lock); + mtx_lock(&dev_priv->rps.lock); + dev_priv->rps.pm_iir = 0; + mtx_unlock(&dev_priv->rps.lock); I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR)); } int intel_enable_rc6(const struct drm_device *dev) { - /* - * Respect the kernel parameter if it is set - */ + /* Respect the kernel parameter if it is set */ if (i915_enable_rc6 >= 0) return i915_enable_rc6; - /* - * Disable RC6 on Ironlake - */ + /* Disable RC6 on Ironlake */ if (INTEL_INFO(dev)->gen == 5) return 0; - /* Sorry Haswell, no RC6 for you for now. */ - if (IS_HASWELL(dev)) - return 0; + if (IS_HASWELL(dev)) { + DRM_DEBUG_DRIVER("Haswell: only RC6 available\n"); + return INTEL_RC6_ENABLE; + } - /* - * Disable rc6 on Sandybridge - */ + /* snb/ivb have more than one rc6 state. */ if (INTEL_INFO(dev)->gen == 6) { DRM_DEBUG_DRIVER("Sandybridge: deep RC6 disabled\n"); return INTEL_RC6_ENABLE; } + DRM_DEBUG_DRIVER("RC6 and deep RC6 enabled\n"); return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); } -void gen6_enable_rps(struct drm_i915_private *dev_priv) +static void gen6_enable_rps(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring; - u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - u32 gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS); - u32 pcu_mbox, rc6_mask = 0; + u32 rp_state_cap; + u32 gt_perf_status; + u32 rc6vids, pcu_mbox, rc6_mask = 0; u32 gtfifodbg; - int cur_freq, min_freq, max_freq; int rc6_mode; - int i; + int i, ret; + + sx_assert(&dev_priv->rps.hw_lock, SA_XLOCKED); /* Here begins a magic sequence of register writes to enable * auto-downclocking. @@ -2357,7 +2542,6 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) * userspace... */ I915_WRITE(GEN6_RC_STATE, 0); - DRM_LOCK(dev_priv->dev); /* Clear the DBG now so we don't confuse earlier errors */ if ((gtfifodbg = I915_READ(GTFIFODBG))) { @@ -2367,6 +2551,14 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) gen6_gt_force_wake_get(dev_priv); + rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS); + + /* In units of 100MHz */ + dev_priv->rps.max_delay = rp_state_cap & 0xff; + dev_priv->rps.min_delay = (rp_state_cap & 0xff0000) >> 16; + dev_priv->rps.cur_delay = 0; + /* disable the counters and set deterministic thresholds */ I915_WRITE(GEN6_RC_CONTROL, 0); @@ -2382,23 +2574,27 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC_SLEEP, 0); I915_WRITE(GEN6_RC1e_THRESHOLD, 1000); I915_WRITE(GEN6_RC6_THRESHOLD, 50000); - I915_WRITE(GEN6_RC6p_THRESHOLD, 100000); + I915_WRITE(GEN6_RC6p_THRESHOLD, 150000); I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */ + /* Check if we are enabling RC6 */ rc6_mode = intel_enable_rc6(dev_priv->dev); if (rc6_mode & INTEL_RC6_ENABLE) rc6_mask |= GEN6_RC_CTL_RC6_ENABLE; - if (rc6_mode & INTEL_RC6p_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; + /* We don't use those on Haswell */ + if (!IS_HASWELL(dev)) { + if (rc6_mode & INTEL_RC6p_ENABLE) + rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE; - if (rc6_mode & INTEL_RC6pp_ENABLE) - rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; + if (rc6_mode & INTEL_RC6pp_ENABLE) + rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE; + } DRM_INFO("Enabling RC6 states: RC6 %s, RC6p %s, RC6pp %s\n", - (rc6_mode & INTEL_RC6_ENABLE) ? "on" : "off", - (rc6_mode & INTEL_RC6p_ENABLE) ? "on" : "off", - (rc6_mode & INTEL_RC6pp_ENABLE) ? "on" : "off"); + (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off", + (rc6_mask & GEN6_RC_CTL_RC6p_ENABLE) ? "on" : "off", + (rc6_mask & GEN6_RC_CTL_RC6pp_ENABLE) ? "on" : "off"); I915_WRITE(GEN6_RC_CONTROL, rc6_mask | @@ -2414,85 +2610,74 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000); I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, - 18 << 24 | - 6 << 16); - I915_WRITE(GEN6_RP_UP_THRESHOLD, 10000); - I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 1000000); - I915_WRITE(GEN6_RP_UP_EI, 100000); - I915_WRITE(GEN6_RP_DOWN_EI, 5000000); + dev_priv->rps.max_delay << 24 | + dev_priv->rps.min_delay << 16); + + I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400); + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000); + I915_WRITE(GEN6_RP_UP_EI, 66000); + I915_WRITE(GEN6_RP_DOWN_EI, 350000); + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_TURBO | - GEN6_RP_MEDIA_HW_MODE | + GEN6_RP_MEDIA_HW_NORMAL_MODE | GEN6_RP_MEDIA_IS_GFX | GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG | - GEN6_RP_DOWN_IDLE_CONT); - - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, - 500)) - DRM_ERROR("timeout waiting for pcode mailbox to become idle\n"); - - I915_WRITE(GEN6_PCODE_DATA, 0); - I915_WRITE(GEN6_PCODE_MAILBOX, - GEN6_PCODE_READY | - GEN6_PCODE_WRITE_MIN_FREQ_TABLE); - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, - 500)) - DRM_ERROR("timeout waiting for pcode mailbox to finish\n"); - - min_freq = (rp_state_cap & 0xff0000) >> 16; - max_freq = rp_state_cap & 0xff; - cur_freq = (gt_perf_status & 0xff00) >> 8; + (IS_HASWELL(dev) ? GEN7_RP_DOWN_IDLE_AVG : GEN6_RP_DOWN_IDLE_CONT)); - /* Check for overclock support */ - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, - 500)) - DRM_ERROR("timeout waiting for pcode mailbox to become idle\n"); - I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_READ_OC_PARAMS); - pcu_mbox = I915_READ(GEN6_PCODE_DATA); - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, - 500)) - DRM_ERROR("timeout waiting for pcode mailbox to finish\n"); - if (pcu_mbox & (1<<31)) { /* OC supported */ - max_freq = pcu_mbox & 0xff; - DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50); + ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0); + if (!ret) { + pcu_mbox = 0; + ret = sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &pcu_mbox); + if (ret && pcu_mbox & (1<<31)) { /* OC supported */ + dev_priv->rps.max_delay = pcu_mbox & 0xff; + DRM_DEBUG_DRIVER("overclocking supported, adjusting frequency max to %dMHz\n", pcu_mbox * 50); + } + } else { + DRM_DEBUG_DRIVER("Failed to set the min frequency\n"); } - /* In units of 100MHz */ - dev_priv->max_delay = max_freq; - dev_priv->min_delay = min_freq; - dev_priv->cur_delay = cur_freq; + gen6_set_rps(dev_priv->dev, (gt_perf_status & 0xff00) >> 8); /* requires MSI enabled */ - I915_WRITE(GEN6_PMIER, - GEN6_PM_MBOX_EVENT | - GEN6_PM_THERMAL_EVENT | - GEN6_PM_RP_DOWN_TIMEOUT | - GEN6_PM_RP_UP_THRESHOLD | - GEN6_PM_RP_DOWN_THRESHOLD | - GEN6_PM_RP_UP_EI_EXPIRED | - GEN6_PM_RP_DOWN_EI_EXPIRED); - mtx_lock(&dev_priv->rps_lock); - if (dev_priv->pm_iir != 0) - printf("KMS: pm_iir %x\n", dev_priv->pm_iir); + I915_WRITE(GEN6_PMIER, GEN6_PM_DEFERRED_EVENTS); + mtx_lock(&dev_priv->rps.lock); + WARN_ON(dev_priv->rps.pm_iir != 0); I915_WRITE(GEN6_PMIMR, 0); - mtx_unlock(&dev_priv->rps_lock); + mtx_unlock(&dev_priv->rps.lock); /* enable all PM interrupts */ I915_WRITE(GEN6_PMINTRMSK, 0); + rc6vids = 0; + ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids); + if (IS_GEN6(dev) && ret) { + DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n"); + } else if (IS_GEN6(dev) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) { + DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n", + GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450); + rc6vids &= 0xffff00; + rc6vids |= GEN6_ENCODE_RC6_VID(450); + ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_RC6VIDS, rc6vids); + if (ret) + DRM_ERROR("Couldn't fix incorrect rc6 voltage\n"); + } + gen6_gt_force_wake_put(dev_priv); - DRM_UNLOCK(dev_priv->dev); } -void gen6_update_ring_freq(struct drm_i915_private *dev_priv) +static void gen6_update_ring_freq(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; int min_freq = 15; - int gpu_freq, ia_freq, max_ia_freq; + int gpu_freq; + unsigned int ia_freq, max_ia_freq; int scaling_factor = 180; - uint64_t tsc_freq; -#if 0 + sx_assert(&dev_priv->rps.hw_lock, SA_XLOCKED); + +#ifdef FREEBSD_WIP max_ia_freq = cpufreq_quick_get_max(0); /* * Default to measured freq if none found, PCU will ensure we don't go @@ -2500,25 +2685,23 @@ void gen6_update_ring_freq(struct drm_i915_private *dev_priv) */ if (!max_ia_freq) max_ia_freq = tsc_khz; - - /* Convert from kHz to MHz */ - max_ia_freq /= 1000; #else + uint64_t tsc_freq; tsc_freq = atomic_load_acq_64(&tsc_freq); - max_ia_freq = tsc_freq / 1000 / 1000; -#endif + max_ia_freq = tsc_freq / 1000; +#endif /* FREEBSD_WIP */ - DRM_LOCK(dev_priv->dev); + /* Convert from kHz to MHz */ + max_ia_freq /= 1000; /* * For each potential GPU frequency, load a ring frequency we'd like * to use for memory access. We do this by specifying the IA frequency * the PCU should use as a reference to determine the ring frequency. */ - for (gpu_freq = dev_priv->max_delay; gpu_freq >= dev_priv->min_delay; + for (gpu_freq = dev_priv->rps.max_delay; gpu_freq >= dev_priv->rps.min_delay; gpu_freq--) { - int diff = dev_priv->max_delay - gpu_freq; - int d; + int diff = dev_priv->rps.max_delay - gpu_freq; /* * For GPU frequencies less than 750MHz, just use the lowest @@ -2528,42 +2711,33 @@ void gen6_update_ring_freq(struct drm_i915_private *dev_priv) ia_freq = 800; else ia_freq = max_ia_freq - ((diff * scaling_factor) / 2); - d = 100; - ia_freq = (ia_freq + d / 2) / d; - - I915_WRITE(GEN6_PCODE_DATA, - (ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT) | - gpu_freq); - I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | - GEN6_PCODE_WRITE_MIN_FREQ_TABLE); - if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & - GEN6_PCODE_READY) == 0, 10)) { - DRM_ERROR("pcode write of freq table timed out\n"); - continue; - } - } + ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100); + ia_freq <<= GEN6_PCODE_FREQ_IA_RATIO_SHIFT; - DRM_UNLOCK(dev_priv->dev); + sandybridge_pcode_write(dev_priv, + GEN6_PCODE_WRITE_MIN_FREQ_TABLE, + ia_freq | gpu_freq); + } } -static void ironlake_teardown_rc6(struct drm_device *dev) +void ironlake_teardown_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - if (dev_priv->renderctx) { - i915_gem_object_unpin(dev_priv->renderctx); - drm_gem_object_unreference(&dev_priv->renderctx->base); - dev_priv->renderctx = NULL; + if (dev_priv->ips.renderctx) { + i915_gem_object_unpin(dev_priv->ips.renderctx); + drm_gem_object_unreference(&dev_priv->ips.renderctx->base); + dev_priv->ips.renderctx = NULL; } - if (dev_priv->pwrctx) { - i915_gem_object_unpin(dev_priv->pwrctx); - drm_gem_object_unreference(&dev_priv->pwrctx->base); - dev_priv->pwrctx = NULL; + if (dev_priv->ips.pwrctx) { + i915_gem_object_unpin(dev_priv->ips.pwrctx); + drm_gem_object_unreference(&dev_priv->ips.pwrctx->base); + dev_priv->ips.pwrctx = NULL; } } -void ironlake_disable_rc6(struct drm_device *dev) +static void ironlake_disable_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -2579,22 +2753,20 @@ void ironlake_disable_rc6(struct drm_device *dev) I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); POSTING_READ(RSTDBYCTL); } - - ironlake_teardown_rc6(dev); } static int ironlake_setup_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - if (dev_priv->renderctx == NULL) - dev_priv->renderctx = intel_alloc_context_page(dev); - if (!dev_priv->renderctx) + if (dev_priv->ips.renderctx == NULL) + dev_priv->ips.renderctx = intel_alloc_context_page(dev); + if (!dev_priv->ips.renderctx) return -ENOMEM; - if (dev_priv->pwrctx == NULL) - dev_priv->pwrctx = intel_alloc_context_page(dev); - if (!dev_priv->pwrctx) { + if (dev_priv->ips.pwrctx == NULL) + dev_priv->ips.pwrctx = intel_alloc_context_page(dev); + if (!dev_priv->ips.pwrctx) { ironlake_teardown_rc6(dev); return -ENOMEM; } @@ -2602,10 +2774,11 @@ static int ironlake_setup_rc6(struct drm_device *dev) return 0; } -void ironlake_enable_rc6(struct drm_device *dev) +static void ironlake_enable_rc6(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_ring_buffer *ring = &dev_priv->rings[RCS]; + struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; + bool was_interruptible; int ret; /* rc6 disabled by default due to repeated reports of hanging during @@ -2614,12 +2787,14 @@ void ironlake_enable_rc6(struct drm_device *dev) if (!intel_enable_rc6(dev)) return; - DRM_LOCK(dev); + DRM_LOCK_ASSERT(dev); + ret = ironlake_setup_rc6(dev); - if (ret) { - DRM_UNLOCK(dev); + if (ret) return; - } + + was_interruptible = dev_priv->mm.interruptible; + dev_priv->mm.interruptible = false; /* * GPU can automatically power down the render unit if given a page @@ -2628,13 +2803,13 @@ void ironlake_enable_rc6(struct drm_device *dev) ret = intel_ring_begin(ring, 6); if (ret) { ironlake_teardown_rc6(dev); - DRM_UNLOCK(dev); + dev_priv->mm.interruptible = was_interruptible; return; } intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN); intel_ring_emit(ring, MI_SET_CONTEXT); - intel_ring_emit(ring, dev_priv->renderctx->gtt_offset | + intel_ring_emit(ring, dev_priv->ips.renderctx->gtt_offset | MI_MM_SPACE_GTT | MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN | @@ -2649,17 +2824,16 @@ void ironlake_enable_rc6(struct drm_device *dev) * does an implicit flush, combined with MI_FLUSH above, it should be * safe to assume that renderctx is valid */ - ret = intel_wait_ring_idle(ring); + ret = intel_ring_idle(ring); + dev_priv->mm.interruptible = was_interruptible; if (ret) { DRM_ERROR("failed to enable ironlake power power savings\n"); ironlake_teardown_rc6(dev); - DRM_UNLOCK(dev); return; } - I915_WRITE(PWRCTXA, dev_priv->pwrctx->gtt_offset | PWRCTX_EN); + I915_WRITE(PWRCTXA, dev_priv->ips.pwrctx->gtt_offset | PWRCTX_EN); I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT); - DRM_UNLOCK(dev); } static unsigned long intel_pxfreq(u32 vidfreq) @@ -2691,22 +2865,24 @@ static const struct cparams { { 0, 800, 231, 23784 }, }; -unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) +static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) { u64 total_count, diff, ret; u32 count1, count2, count3, m = 0, c = 0; unsigned long now = jiffies_to_msecs(jiffies), diff1; int i; - diff1 = now - dev_priv->last_time1; - /* - * sysctl(8) reads the value of sysctl twice in rapid - * succession. There is high chance that it happens in the - * same timer tick. Use the cached value to not divide by - * zero and give the hw a chance to gather more samples. + mtx_assert(&mchdev_lock, MA_OWNED); + + diff1 = now - dev_priv->ips.last_time1; + + /* Prevent division-by-zero if we are asking too fast. + * Also, we don't get interesting results if we are polling + * faster than once in 10ms, so just return the saved value + * in such cases. */ if (diff1 <= 10) - return dev_priv->chipset_power; + return dev_priv->ips.chipset_power; count1 = I915_READ(DMIEC); count2 = I915_READ(DDREC); @@ -2715,33 +2891,50 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) total_count = count1 + count2 + count3; /* FIXME: handle per-counter overflow */ - if (total_count < dev_priv->last_count1) { - diff = ~0UL - dev_priv->last_count1; + if (total_count < dev_priv->ips.last_count1) { + diff = ~0UL - dev_priv->ips.last_count1; diff += total_count; } else { - diff = total_count - dev_priv->last_count1; + diff = total_count - dev_priv->ips.last_count1; } - for (i = 0; i < DRM_ARRAY_SIZE(cparams); i++) { - if (cparams[i].i == dev_priv->c_m && - cparams[i].t == dev_priv->r_t) { + for (i = 0; i < ARRAY_SIZE(cparams); i++) { + if (cparams[i].i == dev_priv->ips.c_m && + cparams[i].t == dev_priv->ips.r_t) { m = cparams[i].m; c = cparams[i].c; break; } } - diff = diff / diff1; + diff = div_u64(diff, diff1); ret = ((m * diff) + c); - ret = ret / 10; + ret = div_u64(ret, 10); + + dev_priv->ips.last_count1 = total_count; + dev_priv->ips.last_time1 = now; - dev_priv->last_count1 = total_count; - dev_priv->last_time1 = now; + dev_priv->ips.chipset_power = ret; - dev_priv->chipset_power = ret; return ret; } +unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) +{ + unsigned long val; + + if (dev_priv->info->gen != 5) + return 0; + + mtx_lock(&mchdev_lock); + + val = __i915_chipset_val(dev_priv); + + mtx_unlock(&mchdev_lock); + + return val; +} + unsigned long i915_mch_val(struct drm_i915_private *dev_priv) { unsigned long m, x, b; @@ -2898,19 +3091,18 @@ static u16 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid) return v_table[pxvid].vd; } -void i915_update_gfx_val(struct drm_i915_private *dev_priv) +static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) { struct timespec now, diff1; u64 diff; unsigned long diffms; u32 count; - if (dev_priv->info->gen != 5) - return; + mtx_assert(&mchdev_lock, MA_OWNED); nanotime(&now); diff1 = now; - timespecsub(&diff1, &dev_priv->last_time2); + timespecsub(&diff1, &dev_priv->ips.last_time2); /* Don't divide by 0 */ diffms = diff1.tv_sec * 1000 + diff1.tv_nsec / 1000000; @@ -2919,28 +3111,42 @@ void i915_update_gfx_val(struct drm_i915_private *dev_priv) count = I915_READ(GFXEC); - if (count < dev_priv->last_count2) { - diff = ~0UL - dev_priv->last_count2; + if (count < dev_priv->ips.last_count2) { + diff = ~0UL - dev_priv->ips.last_count2; diff += count; } else { - diff = count - dev_priv->last_count2; + diff = count - dev_priv->ips.last_count2; } - dev_priv->last_count2 = count; - dev_priv->last_time2 = now; + dev_priv->ips.last_count2 = count; + dev_priv->ips.last_time2 = now; /* More magic constants... */ diff = diff * 1181; - diff = diff / (diffms * 10); - dev_priv->gfx_power = diff; + diff = div_u64(diff, diffms * 10); + dev_priv->ips.gfx_power = diff; } -unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) +void i915_update_gfx_val(struct drm_i915_private *dev_priv) +{ + if (dev_priv->info->gen != 5) + return; + + mtx_lock(&mchdev_lock); + + __i915_update_gfx_val(dev_priv); + + mtx_unlock(&mchdev_lock); +} + +static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) { unsigned long t, corr, state1, corr2, state2; u32 pxvid, ext_v; - pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->cur_delay * 4)); + mtx_assert(&mchdev_lock, MA_OWNED); + + pxvid = I915_READ(PXVFREQ_BASE + (dev_priv->rps.cur_delay * 4)); pxvid = (pxvid >> 24) & 0x7f; ext_v = pvid_to_extvid(dev_priv, pxvid); @@ -2960,14 +3166,30 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) corr = corr * ((150142 * state1) / 10000 - 78642); corr /= 100000; - corr2 = (corr * dev_priv->corr); + corr2 = (corr * dev_priv->ips.corr); state2 = (corr2 * state1) / 10000; state2 /= 100; /* convert to mW */ - i915_update_gfx_val(dev_priv); + __i915_update_gfx_val(dev_priv); + + return dev_priv->ips.gfx_power + state2; +} + +unsigned long i915_gfx_val(struct drm_i915_private *dev_priv) +{ + unsigned long val; + + if (dev_priv->info->gen != 5) + return 0; + + mtx_lock(&mchdev_lock); + + val = __i915_gfx_val(dev_priv); + + mtx_unlock(&mchdev_lock); - return dev_priv->gfx_power + state2; + return val; } /** @@ -2986,8 +3208,8 @@ unsigned long i915_read_mch_val(void) goto out_unlock; dev_priv = i915_mch_dev; - chipset_val = i915_chipset_val(dev_priv); - graphics_val = i915_gfx_val(dev_priv); + chipset_val = __i915_chipset_val(dev_priv); + graphics_val = __i915_gfx_val(dev_priv); ret = chipset_val + graphics_val; @@ -2996,6 +3218,7 @@ out_unlock: return ret; } +EXPORT_SYMBOL_GPL(i915_read_mch_val); /** * i915_gpu_raise - raise GPU frequency limit @@ -3014,14 +3237,15 @@ bool i915_gpu_raise(void) } dev_priv = i915_mch_dev; - if (dev_priv->max_delay > dev_priv->fmax) - dev_priv->max_delay--; + if (dev_priv->ips.max_delay > dev_priv->ips.fmax) + dev_priv->ips.max_delay--; out_unlock: mtx_unlock(&mchdev_lock); return ret; } +EXPORT_SYMBOL_GPL(i915_gpu_raise); /** * i915_gpu_lower - lower GPU frequency limit @@ -3041,14 +3265,15 @@ bool i915_gpu_lower(void) } dev_priv = i915_mch_dev; - if (dev_priv->max_delay < dev_priv->min_delay) - dev_priv->max_delay++; + if (dev_priv->ips.max_delay < dev_priv->ips.min_delay) + dev_priv->ips.max_delay++; out_unlock: mtx_unlock(&mchdev_lock); return ret; } +EXPORT_SYMBOL_GPL(i915_gpu_lower); /** * i915_gpu_busy - indicate GPU business to IPS @@ -3058,20 +3283,24 @@ out_unlock: bool i915_gpu_busy(void) { struct drm_i915_private *dev_priv; + struct intel_ring_buffer *ring; bool ret = false; + int i; mtx_lock(&mchdev_lock); if (!i915_mch_dev) goto out_unlock; dev_priv = i915_mch_dev; - ret = dev_priv->busy; + for_each_ring(ring, dev_priv, i) + ret |= !list_empty(&ring->request_list); out_unlock: mtx_unlock(&mchdev_lock); return ret; } +EXPORT_SYMBOL_GPL(i915_gpu_busy); /** * i915_gpu_turbo_disable - disable graphics turbo @@ -3091,9 +3320,9 @@ bool i915_gpu_turbo_disable(void) } dev_priv = i915_mch_dev; - dev_priv->max_delay = dev_priv->fstart; + dev_priv->ips.max_delay = dev_priv->ips.fstart; - if (!ironlake_set_drps(dev_priv->dev, dev_priv->fstart)) + if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart)) ret = false; out_unlock: @@ -3101,17 +3330,41 @@ out_unlock: return ret; } +EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable); + +#ifdef FREEBSD_WIP +/** + * Tells the intel_ips driver that the i915 driver is now loaded, if + * IPS got loaded first. + * + * This awkward dance is so that neither module has to depend on the + * other in order for IPS to do the appropriate communication of + * GPU turbo limits to i915. + */ +static void +ips_ping_for_i915_load(void) +{ + void (*link)(void); + + link = symbol_get(ips_link_to_i915_driver); + if (link) { + link(); + symbol_put(ips_link_to_i915_driver); + } +} +#endif /* FREEBSD_WIP */ void intel_gpu_ips_init(struct drm_i915_private *dev_priv) { + /* We only register the i915 ips part with intel-ips once everything is + * set up, to avoid intel-ips sneaking in and reading bogus values. */ mtx_lock(&mchdev_lock); i915_mch_dev = dev_priv; - dev_priv->mchdev_lock = &mchdev_lock; mtx_unlock(&mchdev_lock); -#if 0 +#ifdef FREEBSD_WIP ips_ping_for_i915_load(); -#endif +#endif /* FREEBSD_WIP */ } void intel_gpu_ips_teardown(void) @@ -3120,8 +3373,7 @@ void intel_gpu_ips_teardown(void) i915_mch_dev = NULL; mtx_unlock(&mchdev_lock); } - -void intel_init_emon(struct drm_device *dev) +static void intel_init_emon(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; u32 lcfuse; @@ -3189,7 +3441,52 @@ void intel_init_emon(struct drm_device *dev) lcfuse = I915_READ(LCFUSE02); - dev_priv->corr = (lcfuse & LCFUSE_HIV_MASK); + dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); +} + +void intel_disable_gt_powersave(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (IS_IRONLAKE_M(dev)) { + ironlake_disable_drps(dev); + ironlake_disable_rc6(dev); + } else if (INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev)) { + taskqueue_cancel_timeout(dev_priv->wq, &dev_priv->rps.delayed_resume_work, NULL); + sx_xlock(&dev_priv->rps.hw_lock); + gen6_disable_rps(dev); + sx_xunlock(&dev_priv->rps.hw_lock); + } +} + +static void intel_gen6_powersave_work(void *arg, int pending) +{ + struct drm_i915_private *dev_priv = arg; + struct drm_device *dev = dev_priv->dev; + + sx_xlock(&dev_priv->rps.hw_lock); + gen6_enable_rps(dev); + gen6_update_ring_freq(dev); + sx_xunlock(&dev_priv->rps.hw_lock); +} + +void intel_enable_gt_powersave(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (IS_IRONLAKE_M(dev)) { + ironlake_enable_drps(dev); + ironlake_enable_rc6(dev); + intel_init_emon(dev); + } else if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) { + /* + * PCU communication is slow and this doesn't need to be + * done at any specific time, so do this out of our fast path + * to make resume and init faster. + */ + taskqueue_enqueue_timeout(dev_priv->wq, &dev_priv->rps.delayed_resume_work, + round_jiffies_up_relative(HZ)); + } } static void ibx_init_clock_gating(struct drm_device *dev) @@ -3207,14 +3504,12 @@ static void ibx_init_clock_gating(struct drm_device *dev) static void ironlake_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE; + uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; /* Required for FBC */ - dspclk_gate |= DPFCUNIT_CLOCK_GATE_DISABLE | - DPFCRUNIT_CLOCK_GATE_DISABLE | - DPFDUNIT_CLOCK_GATE_DISABLE; - /* Required for CxSR */ - dspclk_gate |= DPARBUNIT_CLOCK_GATE_DISABLE; + dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE | + ILK_DPFCUNIT_CLOCK_GATE_DISABLE | + ILK_DPFDUNIT_CLOCK_GATE_ENABLE; I915_WRITE(PCH_3DCGDIS0, MARIUNIT_CLOCK_GATE_DISABLE | @@ -3222,8 +3517,6 @@ static void ironlake_init_clock_gating(struct drm_device *dev) I915_WRITE(PCH_3DCGDIS1, VFMUNIT_CLOCK_GATE_DISABLE); - I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate); - /* * According to the spec the following bits should be set in * order to enable memory self-refresh @@ -3234,9 +3527,7 @@ static void ironlake_init_clock_gating(struct drm_device *dev) I915_WRITE(ILK_DISPLAY_CHICKEN2, (I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_DPARB_GATE | ILK_VSDPFD_FULL)); - I915_WRITE(ILK_DSPCLK_GATE, - (I915_READ(ILK_DSPCLK_GATE) | - ILK_DPARB_CLK_GATE)); + dspclk_gate |= ILK_DPARBUNIT_CLOCK_GATE_ENABLE; I915_WRITE(DISP_ARB_CTL, (I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS)); @@ -3258,25 +3549,29 @@ static void ironlake_init_clock_gating(struct drm_device *dev) I915_WRITE(ILK_DISPLAY_CHICKEN2, I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_DPARB_GATE); - I915_WRITE(ILK_DSPCLK_GATE, - I915_READ(ILK_DSPCLK_GATE) | - ILK_DPFC_DIS1 | - ILK_DPFC_DIS2 | - ILK_CLK_FBC); } + I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); + I915_WRITE(ILK_DISPLAY_CHICKEN2, I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); I915_WRITE(_3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED << 16 | _3D_CHICKEN2_WM_READ_PIPELINED); + + /* WaDisableRenderCachePipelinedFlush */ + I915_WRITE(CACHE_MODE_0, + _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); + + ibx_init_clock_gating(dev); } static void cpt_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int pipe; + uint32_t val; /* * On Ibex Peak and Cougar Point, we need to disable clock @@ -3286,23 +3581,43 @@ static void cpt_init_clock_gating(struct drm_device *dev) I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) | DPLS_EDP_PPS_FIX_DIS); - /* Without this, mode sets may fail silently on FDI */ - for_each_pipe(pipe) - I915_WRITE(TRANS_CHICKEN2(pipe), TRANS_AUTOTRAIN_GEN_STALL_DIS); + /* The below fixes the weird display corruption, a few pixels shifted + * downward, on (only) LVDS of some HP laptops with IVY. + */ + for_each_pipe(pipe) { + val = TRANS_CHICKEN2_TIMING_OVERRIDE; + if (dev_priv->fdi_rx_polarity_inverted) + val |= TRANS_CHICKEN2_FDI_POLARITY_REVERSED; + I915_WRITE(TRANS_CHICKEN2(pipe), val); + } + /* WADP0ClockGatingDisable */ + for_each_pipe(pipe) { + I915_WRITE(TRANS_CHICKEN1(pipe), + TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); + } } static void gen6_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int pipe; - uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE; + uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; - I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate); + I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); I915_WRITE(ILK_DISPLAY_CHICKEN2, I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); + /* WaDisableHiZPlanesWhenMSAAEnabled */ + I915_WRITE(_3D_CHICKEN, + _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); + + /* WaSetupGtModeTdRowDispatch */ + if (IS_SNB_GT1(dev)) + I915_WRITE(GEN6_GT_MODE, + _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE)); + I915_WRITE(WM3_LP_ILK, 0); I915_WRITE(WM2_LP_ILK, 0); I915_WRITE(WM1_LP_ILK, 0); @@ -3324,13 +3639,17 @@ static void gen6_init_clock_gating(struct drm_device *dev) * * According to the spec, bit 11 (RCCUNIT) must also be set, * but we didn't debug actual testcases to find it out. + * + * Also apply WaDisableVDSUnitClockGating and + * WaDisableRCPBUnitClockGating. */ I915_WRITE(GEN6_UCGCTL2, + GEN7_VDSUNIT_CLOCK_GATE_DISABLE | GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | GEN6_RCCUNIT_CLOCK_GATE_DISABLE); /* Bspec says we need to always set all mask bits. */ - I915_WRITE(_3D_CHICKEN, (0xFFFF << 16) | + I915_WRITE(_3D_CHICKEN3, (0xFFFF << 16) | _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL); /* @@ -3348,10 +3667,14 @@ static void gen6_init_clock_gating(struct drm_device *dev) I915_WRITE(ILK_DISPLAY_CHICKEN2, I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_DPARB_GATE | ILK_VSDPFD_FULL); - I915_WRITE(ILK_DSPCLK_GATE, - I915_READ(ILK_DSPCLK_GATE) | - ILK_DPARB_CLK_GATE | - ILK_DPFD_CLK_GATE); + I915_WRITE(ILK_DSPCLK_GATE_D, + I915_READ(ILK_DSPCLK_GATE_D) | + ILK_DPARBUNIT_CLOCK_GATE_ENABLE | + ILK_DPFDUNIT_CLOCK_GATE_ENABLE); + + /* WaMbcDriverBootEnable */ + I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) | + GEN6_MBCTL_ENABLE_BOOT_FETCH); for_each_pipe(pipe) { I915_WRITE(DSPCNTR(pipe), @@ -3359,6 +3682,13 @@ static void gen6_init_clock_gating(struct drm_device *dev) DISPPLANE_TRICKLE_FEED_DISABLE); intel_flush_display_plane(dev_priv, pipe); } + + /* The default value should be 0x200 according to docs, but the two + * platforms I checked have a 0 for this. (Maybe BIOS overrides?) */ + I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_DISABLE(0xffff)); + I915_WRITE(GEN6_GT_MODE, _MASKED_BIT_ENABLE(GEN6_GT_MODE_HI)); + + cpt_init_clock_gating(dev); } static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) @@ -3373,13 +3703,24 @@ static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) I915_WRITE(GEN7_FF_THREAD_MODE, reg); } -static void ivybridge_init_clock_gating(struct drm_device *dev) +static void lpt_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int pipe; - uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE; - I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate); + /* + * TODO: this bit should only be enabled when really needed, then + * disabled when not needed anymore in order to save power. + */ + if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) + I915_WRITE(SOUTH_DSPCLK_GATE_D, + I915_READ(SOUTH_DSPCLK_GATE_D) | + PCH_LP_PARTITION_LEVEL_DISABLE); +} + +static void haswell_init_clock_gating(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; I915_WRITE(WM3_LP_ILK, 0); I915_WRITE(WM2_LP_ILK, 0); @@ -3390,12 +3731,79 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) */ I915_WRITE(GEN6_UCGCTL2, GEN6_RCZUNIT_CLOCK_GATE_DISABLE); - I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE); + /* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */ + I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, + GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); + + /* WaApplyL3ControlAndL3ChickenMode requires those two on Ivy Bridge */ + I915_WRITE(GEN7_L3CNTLREG1, + GEN7_WA_FOR_GEN7_L3_CONTROL); + I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, + GEN7_WA_L3_CHICKEN_MODE); + + /* This is required by WaCatErrorRejectionIssue */ + I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, + I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | + GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); + + for_each_pipe(pipe) { + I915_WRITE(DSPCNTR(pipe), + I915_READ(DSPCNTR(pipe)) | + DISPPLANE_TRICKLE_FEED_DISABLE); + intel_flush_display_plane(dev_priv, pipe); + } + + gen7_setup_fixed_func_scheduler(dev_priv); + + /* WaDisable4x2SubspanOptimization */ + I915_WRITE(CACHE_MODE_1, + _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + + /* WaMbcDriverBootEnable */ + I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) | + GEN6_MBCTL_ENABLE_BOOT_FETCH); + + /* XXX: This is a workaround for early silicon revisions and should be + * removed later. + */ + I915_WRITE(WM_DBG, + I915_READ(WM_DBG) | + WM_DBG_DISALLOW_MULTIPLE_LP | + WM_DBG_DISALLOW_SPRITE | + WM_DBG_DISALLOW_MAXFIFO); + + lpt_init_clock_gating(dev); +} + +static void ivybridge_init_clock_gating(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; + uint32_t snpcr; + + I915_WRITE(WM3_LP_ILK, 0); + I915_WRITE(WM2_LP_ILK, 0); + I915_WRITE(WM1_LP_ILK, 0); + + I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); + /* WaDisableEarlyCull */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); + + /* WaDisableBackToBackFlipFix */ I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); + /* WaDisablePSDDualDispatchEnable */ + if (IS_IVB_GT1(dev)) + I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, + _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); + else + I915_WRITE(GEN7_HALF_SLICE_CHICKEN1_GT2, + _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); + /* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */ I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); @@ -3404,7 +3812,35 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL); I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, - GEN7_WA_L3_CHICKEN_MODE); + GEN7_WA_L3_CHICKEN_MODE); + if (IS_IVB_GT1(dev)) + I915_WRITE(GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + else + I915_WRITE(GEN7_ROW_CHICKEN2_GT2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + + + /* WaForceL3Serialization */ + I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & + ~L3SQ_URB_READ_CAM_MATCH_DISABLE); + + /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock + * gating disable must be set. Failure to set it results in + * flickering pixels due to Z write ordering failures after + * some amount of runtime in the Mesa "fire" demo, and Unigine + * Sanctuary and Tropics, and apparently anything else with + * alpha test or pixel discard. + * + * According to the spec, bit 11 (RCCUNIT) must also be set, + * but we didn't debug actual testcases to find it out. + * + * According to the spec, bit 13 (RCZUNIT) must be set on IVB. + * This implements the WaDisableRCZUnitClockGating workaround. + */ + I915_WRITE(GEN6_UCGCTL2, + GEN6_RCZUNIT_CLOCK_GATE_DISABLE | + GEN6_RCCUNIT_CLOCK_GATE_DISABLE); /* This is required by WaCatErrorRejectionIssue */ I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, @@ -3418,49 +3854,102 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) intel_flush_display_plane(dev_priv, pipe); } + /* WaMbcDriverBootEnable */ + I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) | + GEN6_MBCTL_ENABLE_BOOT_FETCH); + gen7_setup_fixed_func_scheduler(dev_priv); /* WaDisable4x2SubspanOptimization */ I915_WRITE(CACHE_MODE_1, _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + + snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); + snpcr &= ~GEN6_MBC_SNPCR_MASK; + snpcr |= GEN6_MBC_SNPCR_MED; + I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); + + cpt_init_clock_gating(dev); } static void valleyview_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int pipe; - uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE; - - I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate); I915_WRITE(WM3_LP_ILK, 0); I915_WRITE(WM2_LP_ILK, 0); I915_WRITE(WM1_LP_ILK, 0); - /* According to the spec, bit 13 (RCZUNIT) must be set on IVB. - * This implements the WaDisableRCZUnitClockGating workaround. - */ - I915_WRITE(GEN6_UCGCTL2, GEN6_RCZUNIT_CLOCK_GATE_DISABLE); + I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); - I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE); + /* WaDisableEarlyCull */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); + /* WaDisableBackToBackFlipFix */ I915_WRITE(IVB_CHICKEN3, CHICKEN3_DGMG_REQ_OUT_FIX_DISABLE | CHICKEN3_DGMG_DONE_FIX_DISABLE); + I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, + _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE)); + /* Apply the WaDisableRHWOOptimizationForRenderHang workaround. */ I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1, GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC); /* WaApplyL3ControlAndL3ChickenMode requires those two on Ivy Bridge */ - I915_WRITE(GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL); + I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS); I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE); + /* WaForceL3Serialization */ + I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & + ~L3SQ_URB_READ_CAM_MATCH_DISABLE); + + /* WaDisableDopClockGating */ + I915_WRITE(GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + + /* WaForceL3Serialization */ + I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) & + ~L3SQ_URB_READ_CAM_MATCH_DISABLE); + /* This is required by WaCatErrorRejectionIssue */ I915_WRITE(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG, I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); + /* WaMbcDriverBootEnable */ + I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) | + GEN6_MBCTL_ENABLE_BOOT_FETCH); + + + /* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock + * gating disable must be set. Failure to set it results in + * flickering pixels due to Z write ordering failures after + * some amount of runtime in the Mesa "fire" demo, and Unigine + * Sanctuary and Tropics, and apparently anything else with + * alpha test or pixel discard. + * + * According to the spec, bit 11 (RCCUNIT) must also be set, + * but we didn't debug actual testcases to find it out. + * + * According to the spec, bit 13 (RCZUNIT) must be set on IVB. + * This implements the WaDisableRCZUnitClockGating workaround. + * + * Also apply WaDisableVDSUnitClockGating and + * WaDisableRCPBUnitClockGating. + */ + I915_WRITE(GEN6_UCGCTL2, + GEN7_VDSUNIT_CLOCK_GATE_DISABLE | + GEN7_TDLUNIT_CLOCK_GATE_DISABLE | + GEN6_RCZUNIT_CLOCK_GATE_DISABLE | + GEN6_RCPBUNIT_CLOCK_GATE_DISABLE | + GEN6_RCCUNIT_CLOCK_GATE_DISABLE); + + I915_WRITE(GEN7_UCGCTL4, GEN7_L3BANK2X_CLOCK_GATE_DISABLE); + for_each_pipe(pipe) { I915_WRITE(DSPCNTR(pipe), I915_READ(DSPCNTR(pipe)) | @@ -3470,6 +3959,26 @@ static void valleyview_init_clock_gating(struct drm_device *dev) I915_WRITE(CACHE_MODE_1, _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE)); + + /* + * On ValleyView, the GUnit needs to signal the GT + * when flip and other events complete. So enable + * all the GUnit->GT interrupts here + */ + I915_WRITE(VLV_DPFLIPSTAT, PIPEB_LINE_COMPARE_INT_EN | + PIPEB_HLINE_INT_EN | PIPEB_VBLANK_INT_EN | + SPRITED_FLIPDONE_INT_EN | SPRITEC_FLIPDONE_INT_EN | + PLANEB_FLIPDONE_INT_EN | PIPEA_LINE_COMPARE_INT_EN | + PIPEA_HLINE_INT_EN | PIPEA_VBLANK_INT_EN | + SPRITEB_FLIPDONE_INT_EN | SPRITEA_FLIPDONE_INT_EN | + PLANEA_FLIPDONE_INT_EN); + + /* + * WaDisableVLVClockGating_VBIIssue + * Disable clock gating on th GCFG unit to prevent a delay + * in the reporting of vblank events. + */ + I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); } static void g4x_init_clock_gating(struct drm_device *dev) @@ -3488,6 +3997,10 @@ static void g4x_init_clock_gating(struct drm_device *dev) if (IS_GM45(dev)) dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE; I915_WRITE(DSPCLK_GATE_D, dspclk_gate); + + /* WaDisableRenderCachePipelinedFlush */ + I915_WRITE(CACHE_MODE_0, + _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE)); } static void crestline_init_clock_gating(struct drm_device *dev) @@ -3524,6 +4037,9 @@ static void gen3_init_clock_gating(struct drm_device *dev) if (IS_PINEVIEW(dev)) I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); + + /* IIR "flip pending" means done if this bit is set */ + I915_WRITE(ECOSKPD, _MASKED_BIT_DISABLE(ECO_FLIP_DONE)); } static void i85x_init_clock_gating(struct drm_device *dev) @@ -3545,50 +4061,12 @@ void intel_init_clock_gating(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; dev_priv->display.init_clock_gating(dev); - - if (dev_priv->display.init_pch_clock_gating) - dev_priv->display.init_pch_clock_gating(dev); -} - -static void gen6_sanitize_pm(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u32 limits, delay, old; - - gen6_gt_force_wake_get(dev_priv); - - old = limits = I915_READ(GEN6_RP_INTERRUPT_LIMITS); - /* Make sure we continue to get interrupts - * until we hit the minimum or maximum frequencies. - */ - limits &= ~(0x3f << 16 | 0x3f << 24); - delay = dev_priv->cur_delay; - if (delay < dev_priv->max_delay) - limits |= (dev_priv->max_delay & 0x3f) << 24; - if (delay > dev_priv->min_delay) - limits |= (dev_priv->min_delay & 0x3f) << 16; - - if (old != limits) { - DRM_ERROR("Power management discrepancy: GEN6_RP_INTERRUPT_LIMITS expected %08x, was %08x\n", - limits, old); - I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, limits); - } - - gen6_gt_force_wake_put(dev_priv); -} - -void intel_sanitize_pm(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (dev_priv->display.sanitize_pm) - dev_priv->display.sanitize_pm(dev); } /* Starting with Haswell, we have different power wells for * different parts of the GPU. This attempts to enable them all. */ -static void intel_init_power_wells(struct drm_device *dev) +void intel_init_power_wells(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; unsigned long power_wells[] = { @@ -3603,19 +4081,16 @@ static void intel_init_power_wells(struct drm_device *dev) DRM_LOCK(dev); - for (i = 0; i < DRM_ARRAY_SIZE(power_wells); i++) { + for (i = 0; i < ARRAY_SIZE(power_wells); i++) { int well = I915_READ(power_wells[i]); if ((well & HSW_PWR_WELL_STATE) == 0) { I915_WRITE(power_wells[i], well & HSW_PWR_WELL_ENABLE); - if (wait_for(I915_READ(power_wells[i] & HSW_PWR_WELL_STATE), 20)) + if (wait_for((I915_READ(power_wells[i]) & HSW_PWR_WELL_STATE), 20)) DRM_ERROR("Error enabling power well %lx\n", power_wells[i]); } } -printf("XXXKIB HACK: HSW RC OFF\n"); - I915_WRITE(GEN6_RC_STATE, 0); - I915_WRITE(GEN6_RC_CONTROL, 0); DRM_UNLOCK(dev); } @@ -3649,39 +4124,6 @@ void intel_init_pm(struct drm_device *dev) /* For FIFO watermark updates */ if (HAS_PCH_SPLIT(dev)) { - dev_priv->display.force_wake_get = __gen6_gt_force_wake_get; - dev_priv->display.force_wake_put = __gen6_gt_force_wake_put; - - /* IVB configs may use multi-threaded forcewake */ - if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { - u32 ecobus; - - /* A small trick here - if the bios hasn't configured MT forcewake, - * and if the device is in RC6, then force_wake_mt_get will not wake - * the device and the ECOBUS read will return zero. Which will be - * (correctly) interpreted by the test below as MT forcewake being - * disabled. - */ - DRM_LOCK(dev); - __gen6_gt_force_wake_mt_get(dev_priv); - ecobus = I915_READ_NOTRACE(ECOBUS); - __gen6_gt_force_wake_mt_put(dev_priv); - DRM_UNLOCK(dev); - - if (ecobus & FORCEWAKE_MT_ENABLE) { - DRM_DEBUG_KMS("Using MT version of forcewake\n"); - dev_priv->display.force_wake_get = - __gen6_gt_force_wake_mt_get; - dev_priv->display.force_wake_put = - __gen6_gt_force_wake_mt_put; - } - } - - if (HAS_PCH_IBX(dev)) - dev_priv->display.init_pch_clock_gating = ibx_init_clock_gating; - else if (HAS_PCH_CPT(dev)) - dev_priv->display.init_pch_clock_gating = cpt_init_clock_gating; - if (IS_GEN5(dev)) { if (I915_READ(MLTR_ILK) & ILK_SRLT_MASK) dev_priv->display.update_wm = ironlake_update_wm; @@ -3701,11 +4143,10 @@ void intel_init_pm(struct drm_device *dev) dev_priv->display.update_wm = NULL; } dev_priv->display.init_clock_gating = gen6_init_clock_gating; - dev_priv->display.sanitize_pm = gen6_sanitize_pm; } else if (IS_IVYBRIDGE(dev)) { /* FIXME: detect B0+ stepping and use auto training */ if (SNB_READ_WM0_LATENCY()) { - dev_priv->display.update_wm = sandybridge_update_wm; + dev_priv->display.update_wm = ivybridge_update_wm; dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm; } else { DRM_DEBUG_KMS("Failed to read display plane latency. " @@ -3713,7 +4154,6 @@ void intel_init_pm(struct drm_device *dev) dev_priv->display.update_wm = NULL; } dev_priv->display.init_clock_gating = ivybridge_init_clock_gating; - dev_priv->display.sanitize_pm = gen6_sanitize_pm; } else if (IS_HASWELL(dev)) { if (SNB_READ_WM0_LATENCY()) { dev_priv->display.update_wm = sandybridge_update_wm; @@ -3724,16 +4164,13 @@ void intel_init_pm(struct drm_device *dev) "Disable CxSR\n"); dev_priv->display.update_wm = NULL; } - dev_priv->display.init_clock_gating = ivybridge_init_clock_gating; - dev_priv->display.sanitize_pm = gen6_sanitize_pm; + dev_priv->display.init_clock_gating = haswell_init_clock_gating; } else dev_priv->display.update_wm = NULL; } else if (IS_VALLEYVIEW(dev)) { dev_priv->display.update_wm = valleyview_update_wm; dev_priv->display.init_clock_gating = valleyview_init_clock_gating; - dev_priv->display.force_wake_get = vlv_force_wake_get; - dev_priv->display.force_wake_put = vlv_force_wake_put; } else if (IS_PINEVIEW(dev)) { if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev), dev_priv->is_ddr3, @@ -3779,10 +4216,264 @@ void intel_init_pm(struct drm_device *dev) else dev_priv->display.get_fifo_size = i830_get_fifo_size; } +} + +static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv) +{ + u32 gt_thread_status_mask; - /* We attempt to init the necessary power wells early in the initialization - * time, so the subsystems that expect power to be enabled can work. + if (IS_HASWELL(dev_priv->dev)) + gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK_HSW; + else + gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK; + + /* w/a for a sporadic read returning 0 by waiting for the GT + * thread to wake up. */ - intel_init_power_wells(dev); + if (wait_for_atomic_us((I915_READ_NOTRACE(GEN6_GT_THREAD_STATUS_REG) & gt_thread_status_mask) == 0, 500)) + DRM_ERROR("GT thread status wait timed out\n"); +} + +static void __gen6_gt_force_wake_reset(struct drm_i915_private *dev_priv) +{ + I915_WRITE_NOTRACE(FORCEWAKE, 0); + POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */ +} + +static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv) +{ + u32 forcewake_ack; + + if (IS_HASWELL(dev_priv->dev)) + forcewake_ack = FORCEWAKE_ACK_HSW; + else + forcewake_ack = FORCEWAKE_ACK; + + if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1) == 0, + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n"); + + I915_WRITE_NOTRACE(FORCEWAKE, FORCEWAKE_KERNEL); + POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */ + + if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1), + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out waiting for forcewake to ack request.\n"); + + __gen6_gt_wait_for_thread_c0(dev_priv); +} + +static void __gen6_gt_force_wake_mt_reset(struct drm_i915_private *dev_priv) +{ + I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(0xffff)); + /* something from same cacheline, but !FORCEWAKE_MT */ + POSTING_READ(ECOBUS); +} + +static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv) +{ + u32 forcewake_ack; + + if (IS_HASWELL(dev_priv->dev)) + forcewake_ack = FORCEWAKE_ACK_HSW; + else + forcewake_ack = FORCEWAKE_MT_ACK; + + if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1) == 0, + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n"); + + I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); + /* something from same cacheline, but !FORCEWAKE_MT */ + POSTING_READ(ECOBUS); + + if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & 1), + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out waiting for forcewake to ack request.\n"); + + __gen6_gt_wait_for_thread_c0(dev_priv); +} + +/* + * Generally this is called implicitly by the register read function. However, + * if some sequence requires the GT to not power down then this function should + * be called at the beginning of the sequence followed by a call to + * gen6_gt_force_wake_put() at the end of the sequence. + */ +void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv) +{ + + mtx_lock(&dev_priv->gt_lock); + if (dev_priv->forcewake_count++ == 0) + dev_priv->gt.force_wake_get(dev_priv); + mtx_unlock(&dev_priv->gt_lock); +} + +void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv) +{ + u32 gtfifodbg; + gtfifodbg = I915_READ_NOTRACE(GTFIFODBG); + if (WARN(gtfifodbg & GT_FIFO_CPU_ERROR_MASK, + "MMIO read or write has been dropped %x\n", gtfifodbg)) + I915_WRITE_NOTRACE(GTFIFODBG, GT_FIFO_CPU_ERROR_MASK); } +static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) +{ + I915_WRITE_NOTRACE(FORCEWAKE, 0); + /* something from same cacheline, but !FORCEWAKE */ + POSTING_READ(ECOBUS); + gen6_gt_check_fifodbg(dev_priv); +} + +static void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv) +{ + I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); + /* something from same cacheline, but !FORCEWAKE_MT */ + POSTING_READ(ECOBUS); + gen6_gt_check_fifodbg(dev_priv); +} + +/* + * see gen6_gt_force_wake_get() + */ +void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv) +{ + + mtx_lock(&dev_priv->gt_lock); + if (--dev_priv->forcewake_count == 0) + dev_priv->gt.force_wake_put(dev_priv); + mtx_unlock(&dev_priv->gt_lock); +} + +int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) +{ + int ret = 0; + + if (dev_priv->gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) { + int loop = 500; + u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES); + while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) { + udelay(10); + fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES); + } + if (WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES)) + ++ret; + dev_priv->gt_fifo_count = fifo; + } + dev_priv->gt_fifo_count--; + + return ret; +} + +static void vlv_force_wake_reset(struct drm_i915_private *dev_priv) +{ + I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(0xffff)); + /* something from same cacheline, but !FORCEWAKE_VLV */ + POSTING_READ(FORCEWAKE_ACK_VLV); +} + +static void vlv_force_wake_get(struct drm_i915_private *dev_priv) +{ + if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & 1) == 0, + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n"); + + I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL)); + + if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & 1), + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Timed out waiting for forcewake to ack request.\n"); + + __gen6_gt_wait_for_thread_c0(dev_priv); +} + +static void vlv_force_wake_put(struct drm_i915_private *dev_priv) +{ + I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL)); + /* something from same cacheline, but !FORCEWAKE_VLV */ + POSTING_READ(FORCEWAKE_ACK_VLV); + gen6_gt_check_fifodbg(dev_priv); +} + +void intel_gt_reset(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (IS_VALLEYVIEW(dev)) { + vlv_force_wake_reset(dev_priv); + } else if (INTEL_INFO(dev)->gen >= 6) { + __gen6_gt_force_wake_reset(dev_priv); + if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) + __gen6_gt_force_wake_mt_reset(dev_priv); + } +} + +void intel_gt_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + mtx_init(&dev_priv->gt_lock, "i915_gt_lock", NULL, MTX_DEF); + + intel_gt_reset(dev); + + if (IS_VALLEYVIEW(dev)) { + dev_priv->gt.force_wake_get = vlv_force_wake_get; + dev_priv->gt.force_wake_put = vlv_force_wake_put; + } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { + dev_priv->gt.force_wake_get = __gen6_gt_force_wake_mt_get; + dev_priv->gt.force_wake_put = __gen6_gt_force_wake_mt_put; + } else if (IS_GEN6(dev)) { + dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get; + dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put; + } + TIMEOUT_TASK_INIT(dev_priv->wq, &dev_priv->rps.delayed_resume_work, 0, + intel_gen6_powersave_work, dev_priv); +} + +int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val) +{ + sx_assert(&dev_priv->rps.hw_lock, SA_XLOCKED); + + if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { + DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n"); + return -EAGAIN; + } + + I915_WRITE(GEN6_PCODE_DATA, *val); + I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); + + if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, + 500)) { + DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); + return -ETIMEDOUT; + } + + *val = I915_READ(GEN6_PCODE_DATA); + I915_WRITE(GEN6_PCODE_DATA, 0); + + return 0; +} + +int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val) +{ + sx_assert(&dev_priv->rps.hw_lock, SA_XLOCKED); + + if (I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { + DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n"); + return -EAGAIN; + } + + I915_WRITE(GEN6_PCODE_DATA, val); + I915_WRITE(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox); + + if (wait_for((I915_READ(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) == 0, + 500)) { + DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); + return -ETIMEDOUT; + } + + I915_WRITE(GEN6_PCODE_DATA, 0); + + return 0; +} |