Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / drivers / gpu / drm / i915 / intel_overlay.c
1 /*
2  * Copyright © 2009
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Daniel Vetter <daniel@ffwll.ch>
25  *
26  * Derived from Xorg ddx, xf86-video-intel, src/i830_video.c
27  */
28 #include <drm/drmP.h>
29 #include <drm/i915_drm.h>
30 #include "i915_drv.h"
31 #include "i915_reg.h"
32 #include "intel_drv.h"
33
34 /* Limits for overlay size. According to intel doc, the real limits are:
35  * Y width: 4095, UV width (planar): 2047, Y height: 2047,
36  * UV width (planar): * 1023. But the xorg thinks 2048 for height and width. Use
37  * the mininum of both.  */
38 #define IMAGE_MAX_WIDTH         2048
39 #define IMAGE_MAX_HEIGHT        2046 /* 2 * 1023 */
40 /* on 830 and 845 these large limits result in the card hanging */
41 #define IMAGE_MAX_WIDTH_LEGACY  1024
42 #define IMAGE_MAX_HEIGHT_LEGACY 1088
43
44 /* overlay register definitions */
45 /* OCMD register */
46 #define OCMD_TILED_SURFACE      (0x1<<19)
47 #define OCMD_MIRROR_MASK        (0x3<<17)
48 #define OCMD_MIRROR_MODE        (0x3<<17)
49 #define OCMD_MIRROR_HORIZONTAL  (0x1<<17)
50 #define OCMD_MIRROR_VERTICAL    (0x2<<17)
51 #define OCMD_MIRROR_BOTH        (0x3<<17)
52 #define OCMD_BYTEORDER_MASK     (0x3<<14) /* zero for YUYV or FOURCC YUY2 */
53 #define OCMD_UV_SWAP            (0x1<<14) /* YVYU */
54 #define OCMD_Y_SWAP             (0x2<<14) /* UYVY or FOURCC UYVY */
55 #define OCMD_Y_AND_UV_SWAP      (0x3<<14) /* VYUY */
56 #define OCMD_SOURCE_FORMAT_MASK (0xf<<10)
57 #define OCMD_RGB_888            (0x1<<10) /* not in i965 Intel docs */
58 #define OCMD_RGB_555            (0x2<<10) /* not in i965 Intel docs */
59 #define OCMD_RGB_565            (0x3<<10) /* not in i965 Intel docs */
60 #define OCMD_YUV_422_PACKED     (0x8<<10)
61 #define OCMD_YUV_411_PACKED     (0x9<<10) /* not in i965 Intel docs */
62 #define OCMD_YUV_420_PLANAR     (0xc<<10)
63 #define OCMD_YUV_422_PLANAR     (0xd<<10)
64 #define OCMD_YUV_410_PLANAR     (0xe<<10) /* also 411 */
65 #define OCMD_TVSYNCFLIP_PARITY  (0x1<<9)
66 #define OCMD_TVSYNCFLIP_ENABLE  (0x1<<7)
67 #define OCMD_BUF_TYPE_MASK      (0x1<<5)
68 #define OCMD_BUF_TYPE_FRAME     (0x0<<5)
69 #define OCMD_BUF_TYPE_FIELD     (0x1<<5)
70 #define OCMD_TEST_MODE          (0x1<<4)
71 #define OCMD_BUFFER_SELECT      (0x3<<2)
72 #define OCMD_BUFFER0            (0x0<<2)
73 #define OCMD_BUFFER1            (0x1<<2)
74 #define OCMD_FIELD_SELECT       (0x1<<2)
75 #define OCMD_FIELD0             (0x0<<1)
76 #define OCMD_FIELD1             (0x1<<1)
77 #define OCMD_ENABLE             (0x1<<0)
78
79 /* OCONFIG register */
80 #define OCONF_PIPE_MASK         (0x1<<18)
81 #define OCONF_PIPE_A            (0x0<<18)
82 #define OCONF_PIPE_B            (0x1<<18)
83 #define OCONF_GAMMA2_ENABLE     (0x1<<16)
84 #define OCONF_CSC_MODE_BT601    (0x0<<5)
85 #define OCONF_CSC_MODE_BT709    (0x1<<5)
86 #define OCONF_CSC_BYPASS        (0x1<<4)
87 #define OCONF_CC_OUT_8BIT       (0x1<<3)
88 #define OCONF_TEST_MODE         (0x1<<2)
89 #define OCONF_THREE_LINE_BUFFER (0x1<<0)
90 #define OCONF_TWO_LINE_BUFFER   (0x0<<0)
91
92 /* DCLRKM (dst-key) register */
93 #define DST_KEY_ENABLE          (0x1<<31)
94 #define CLK_RGB24_MASK          0x0
95 #define CLK_RGB16_MASK          0x070307
96 #define CLK_RGB15_MASK          0x070707
97 #define CLK_RGB8I_MASK          0xffffff
98
99 #define RGB16_TO_COLORKEY(c) \
100         (((c & 0xF800) << 8) | ((c & 0x07E0) << 5) | ((c & 0x001F) << 3))
101 #define RGB15_TO_COLORKEY(c) \
102         (((c & 0x7c00) << 9) | ((c & 0x03E0) << 6) | ((c & 0x001F) << 3))
103
104 /* overlay flip addr flag */
105 #define OFC_UPDATE              0x1
106
107 /* polyphase filter coefficients */
108 #define N_HORIZ_Y_TAPS          5
109 #define N_VERT_Y_TAPS           3
110 #define N_HORIZ_UV_TAPS         3
111 #define N_VERT_UV_TAPS          3
112 #define N_PHASES                17
113 #define MAX_TAPS                5
114
115 /* memory bufferd overlay registers */
116 struct overlay_registers {
117         u32 OBUF_0Y;
118         u32 OBUF_1Y;
119         u32 OBUF_0U;
120         u32 OBUF_0V;
121         u32 OBUF_1U;
122         u32 OBUF_1V;
123         u32 OSTRIDE;
124         u32 YRGB_VPH;
125         u32 UV_VPH;
126         u32 HORZ_PH;
127         u32 INIT_PHS;
128         u32 DWINPOS;
129         u32 DWINSZ;
130         u32 SWIDTH;
131         u32 SWIDTHSW;
132         u32 SHEIGHT;
133         u32 YRGBSCALE;
134         u32 UVSCALE;
135         u32 OCLRC0;
136         u32 OCLRC1;
137         u32 DCLRKV;
138         u32 DCLRKM;
139         u32 SCLRKVH;
140         u32 SCLRKVL;
141         u32 SCLRKEN;
142         u32 OCONFIG;
143         u32 OCMD;
144         u32 RESERVED1; /* 0x6C */
145         u32 OSTART_0Y;
146         u32 OSTART_1Y;
147         u32 OSTART_0U;
148         u32 OSTART_0V;
149         u32 OSTART_1U;
150         u32 OSTART_1V;
151         u32 OTILEOFF_0Y;
152         u32 OTILEOFF_1Y;
153         u32 OTILEOFF_0U;
154         u32 OTILEOFF_0V;
155         u32 OTILEOFF_1U;
156         u32 OTILEOFF_1V;
157         u32 FASTHSCALE; /* 0xA0 */
158         u32 UVSCALEV; /* 0xA4 */
159         u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
160         u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
161         u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
162         u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
163         u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
164         u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
165         u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
166         u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
167         u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
168 };
169
170 struct intel_overlay {
171         struct drm_device *dev;
172         struct intel_crtc *crtc;
173         struct drm_i915_gem_object *vid_bo;
174         struct drm_i915_gem_object *old_vid_bo;
175         bool active;
176         bool pfit_active;
177         u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
178         u32 color_key:24;
179         u32 color_key_enabled:1;
180         u32 brightness, contrast, saturation;
181         u32 old_xscale, old_yscale;
182         /* register access */
183         u32 flip_addr;
184         struct drm_i915_gem_object *reg_bo;
185         /* flip handling */
186         struct drm_i915_gem_request *last_flip_req;
187         void (*flip_tail)(struct intel_overlay *);
188 };
189
190 static struct overlay_registers __iomem *
191 intel_overlay_map_regs(struct intel_overlay *overlay)
192 {
193         struct drm_i915_private *dev_priv = to_i915(overlay->dev);
194         struct i915_ggtt *ggtt = &dev_priv->ggtt;
195         struct overlay_registers __iomem *regs;
196
197         if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
198                 regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr;
199         else
200                 regs = io_mapping_map_wc(ggtt->mappable,
201                                          i915_gem_obj_ggtt_offset(overlay->reg_bo));
202
203         return regs;
204 }
205
206 static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
207                                      struct overlay_registers __iomem *regs)
208 {
209         if (!OVERLAY_NEEDS_PHYSICAL(overlay->dev))
210                 io_mapping_unmap(regs);
211 }
212
213 static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
214                                          struct drm_i915_gem_request *req,
215                                          void (*tail)(struct intel_overlay *))
216 {
217         int ret;
218
219         WARN_ON(overlay->last_flip_req);
220         i915_gem_request_assign(&overlay->last_flip_req, req);
221         i915_add_request(req);
222
223         overlay->flip_tail = tail;
224         ret = i915_wait_request(overlay->last_flip_req);
225         if (ret)
226                 return ret;
227
228         i915_gem_request_assign(&overlay->last_flip_req, NULL);
229         return 0;
230 }
231
232 /* overlay needs to be disable in OCMD reg */
233 static int intel_overlay_on(struct intel_overlay *overlay)
234 {
235         struct drm_device *dev = overlay->dev;
236         struct drm_i915_private *dev_priv = dev->dev_private;
237         struct intel_engine_cs *engine = &dev_priv->engine[RCS];
238         struct drm_i915_gem_request *req;
239         int ret;
240
241         WARN_ON(overlay->active);
242         WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
243
244         req = i915_gem_request_alloc(engine, NULL);
245         if (IS_ERR(req))
246                 return PTR_ERR(req);
247
248         ret = intel_ring_begin(req, 4);
249         if (ret) {
250                 i915_add_request_no_flush(req);
251                 return ret;
252         }
253
254         overlay->active = true;
255
256         intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
257         intel_ring_emit(engine, overlay->flip_addr | OFC_UPDATE);
258         intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
259         intel_ring_emit(engine, MI_NOOP);
260         intel_ring_advance(engine);
261
262         return intel_overlay_do_wait_request(overlay, req, NULL);
263 }
264
265 /* overlay needs to be enabled in OCMD reg */
266 static int intel_overlay_continue(struct intel_overlay *overlay,
267                                   bool load_polyphase_filter)
268 {
269         struct drm_device *dev = overlay->dev;
270         struct drm_i915_private *dev_priv = dev->dev_private;
271         struct intel_engine_cs *engine = &dev_priv->engine[RCS];
272         struct drm_i915_gem_request *req;
273         u32 flip_addr = overlay->flip_addr;
274         u32 tmp;
275         int ret;
276
277         WARN_ON(!overlay->active);
278
279         if (load_polyphase_filter)
280                 flip_addr |= OFC_UPDATE;
281
282         /* check for underruns */
283         tmp = I915_READ(DOVSTA);
284         if (tmp & (1 << 17))
285                 DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
286
287         req = i915_gem_request_alloc(engine, NULL);
288         if (IS_ERR(req))
289                 return PTR_ERR(req);
290
291         ret = intel_ring_begin(req, 2);
292         if (ret) {
293                 i915_add_request_no_flush(req);
294                 return ret;
295         }
296
297         intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
298         intel_ring_emit(engine, flip_addr);
299         intel_ring_advance(engine);
300
301         WARN_ON(overlay->last_flip_req);
302         i915_gem_request_assign(&overlay->last_flip_req, req);
303         i915_add_request(req);
304
305         return 0;
306 }
307
308 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
309 {
310         struct drm_i915_gem_object *obj = overlay->old_vid_bo;
311
312         i915_gem_object_ggtt_unpin(obj);
313         drm_gem_object_unreference(&obj->base);
314
315         overlay->old_vid_bo = NULL;
316 }
317
318 static void intel_overlay_off_tail(struct intel_overlay *overlay)
319 {
320         struct drm_i915_gem_object *obj = overlay->vid_bo;
321
322         /* never have the overlay hw on without showing a frame */
323         if (WARN_ON(!obj))
324                 return;
325
326         i915_gem_object_ggtt_unpin(obj);
327         drm_gem_object_unreference(&obj->base);
328         overlay->vid_bo = NULL;
329
330         overlay->crtc->overlay = NULL;
331         overlay->crtc = NULL;
332         overlay->active = false;
333 }
334
335 /* overlay needs to be disabled in OCMD reg */
336 static int intel_overlay_off(struct intel_overlay *overlay)
337 {
338         struct drm_device *dev = overlay->dev;
339         struct drm_i915_private *dev_priv = dev->dev_private;
340         struct intel_engine_cs *engine = &dev_priv->engine[RCS];
341         struct drm_i915_gem_request *req;
342         u32 flip_addr = overlay->flip_addr;
343         int ret;
344
345         WARN_ON(!overlay->active);
346
347         /* According to intel docs the overlay hw may hang (when switching
348          * off) without loading the filter coeffs. It is however unclear whether
349          * this applies to the disabling of the overlay or to the switching off
350          * of the hw. Do it in both cases */
351         flip_addr |= OFC_UPDATE;
352
353         req = i915_gem_request_alloc(engine, NULL);
354         if (IS_ERR(req))
355                 return PTR_ERR(req);
356
357         ret = intel_ring_begin(req, 6);
358         if (ret) {
359                 i915_add_request_no_flush(req);
360                 return ret;
361         }
362
363         /* wait for overlay to go idle */
364         intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
365         intel_ring_emit(engine, flip_addr);
366         intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
367         /* turn overlay off */
368         if (IS_I830(dev)) {
369                 /* Workaround: Don't disable the overlay fully, since otherwise
370                  * it dies on the next OVERLAY_ON cmd. */
371                 intel_ring_emit(engine, MI_NOOP);
372                 intel_ring_emit(engine, MI_NOOP);
373                 intel_ring_emit(engine, MI_NOOP);
374         } else {
375                 intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
376                 intel_ring_emit(engine, flip_addr);
377                 intel_ring_emit(engine,
378                                 MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
379         }
380         intel_ring_advance(engine);
381
382         return intel_overlay_do_wait_request(overlay, req, intel_overlay_off_tail);
383 }
384
385 /* recover from an interruption due to a signal
386  * We have to be careful not to repeat work forever an make forward progess. */
387 static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
388 {
389         int ret;
390
391         if (overlay->last_flip_req == NULL)
392                 return 0;
393
394         ret = i915_wait_request(overlay->last_flip_req);
395         if (ret)
396                 return ret;
397
398         if (overlay->flip_tail)
399                 overlay->flip_tail(overlay);
400
401         i915_gem_request_assign(&overlay->last_flip_req, NULL);
402         return 0;
403 }
404
405 /* Wait for pending overlay flip and release old frame.
406  * Needs to be called before the overlay register are changed
407  * via intel_overlay_(un)map_regs
408  */
409 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
410 {
411         struct drm_device *dev = overlay->dev;
412         struct drm_i915_private *dev_priv = dev->dev_private;
413         struct intel_engine_cs *engine = &dev_priv->engine[RCS];
414         int ret;
415
416         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
417
418         /* Only wait if there is actually an old frame to release to
419          * guarantee forward progress.
420          */
421         if (!overlay->old_vid_bo)
422                 return 0;
423
424         if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
425                 /* synchronous slowpath */
426                 struct drm_i915_gem_request *req;
427
428                 req = i915_gem_request_alloc(engine, NULL);
429                 if (IS_ERR(req))
430                         return PTR_ERR(req);
431
432                 ret = intel_ring_begin(req, 2);
433                 if (ret) {
434                         i915_add_request_no_flush(req);
435                         return ret;
436                 }
437
438                 intel_ring_emit(engine,
439                                 MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
440                 intel_ring_emit(engine, MI_NOOP);
441                 intel_ring_advance(engine);
442
443                 ret = intel_overlay_do_wait_request(overlay, req,
444                                                     intel_overlay_release_old_vid_tail);
445                 if (ret)
446                         return ret;
447         }
448
449         intel_overlay_release_old_vid_tail(overlay);
450
451
452         i915_gem_track_fb(overlay->old_vid_bo, NULL,
453                           INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
454         return 0;
455 }
456
457 void intel_overlay_reset(struct drm_i915_private *dev_priv)
458 {
459         struct intel_overlay *overlay = dev_priv->overlay;
460
461         if (!overlay)
462                 return;
463
464         intel_overlay_release_old_vid(overlay);
465
466         overlay->last_flip_req = NULL;
467         overlay->old_xscale = 0;
468         overlay->old_yscale = 0;
469         overlay->crtc = NULL;
470         overlay->active = false;
471 }
472
473 struct put_image_params {
474         int format;
475         short dst_x;
476         short dst_y;
477         short dst_w;
478         short dst_h;
479         short src_w;
480         short src_scan_h;
481         short src_scan_w;
482         short src_h;
483         short stride_Y;
484         short stride_UV;
485         int offset_Y;
486         int offset_U;
487         int offset_V;
488 };
489
490 static int packed_depth_bytes(u32 format)
491 {
492         switch (format & I915_OVERLAY_DEPTH_MASK) {
493         case I915_OVERLAY_YUV422:
494                 return 4;
495         case I915_OVERLAY_YUV411:
496                 /* return 6; not implemented */
497         default:
498                 return -EINVAL;
499         }
500 }
501
502 static int packed_width_bytes(u32 format, short width)
503 {
504         switch (format & I915_OVERLAY_DEPTH_MASK) {
505         case I915_OVERLAY_YUV422:
506                 return width << 1;
507         default:
508                 return -EINVAL;
509         }
510 }
511
512 static int uv_hsubsampling(u32 format)
513 {
514         switch (format & I915_OVERLAY_DEPTH_MASK) {
515         case I915_OVERLAY_YUV422:
516         case I915_OVERLAY_YUV420:
517                 return 2;
518         case I915_OVERLAY_YUV411:
519         case I915_OVERLAY_YUV410:
520                 return 4;
521         default:
522                 return -EINVAL;
523         }
524 }
525
526 static int uv_vsubsampling(u32 format)
527 {
528         switch (format & I915_OVERLAY_DEPTH_MASK) {
529         case I915_OVERLAY_YUV420:
530         case I915_OVERLAY_YUV410:
531                 return 2;
532         case I915_OVERLAY_YUV422:
533         case I915_OVERLAY_YUV411:
534                 return 1;
535         default:
536                 return -EINVAL;
537         }
538 }
539
540 static u32 calc_swidthsw(struct drm_device *dev, u32 offset, u32 width)
541 {
542         u32 mask, shift, ret;
543         if (IS_GEN2(dev)) {
544                 mask = 0x1f;
545                 shift = 5;
546         } else {
547                 mask = 0x3f;
548                 shift = 6;
549         }
550         ret = ((offset + width + mask) >> shift) - (offset >> shift);
551         if (!IS_GEN2(dev))
552                 ret <<= 1;
553         ret -= 1;
554         return ret << 2;
555 }
556
557 static const u16 y_static_hcoeffs[N_HORIZ_Y_TAPS * N_PHASES] = {
558         0x3000, 0xb4a0, 0x1930, 0x1920, 0xb4a0,
559         0x3000, 0xb500, 0x19d0, 0x1880, 0xb440,
560         0x3000, 0xb540, 0x1a88, 0x2f80, 0xb3e0,
561         0x3000, 0xb580, 0x1b30, 0x2e20, 0xb380,
562         0x3000, 0xb5c0, 0x1bd8, 0x2cc0, 0xb320,
563         0x3020, 0xb5e0, 0x1c60, 0x2b80, 0xb2c0,
564         0x3020, 0xb5e0, 0x1cf8, 0x2a20, 0xb260,
565         0x3020, 0xb5e0, 0x1d80, 0x28e0, 0xb200,
566         0x3020, 0xb5c0, 0x1e08, 0x3f40, 0xb1c0,
567         0x3020, 0xb580, 0x1e78, 0x3ce0, 0xb160,
568         0x3040, 0xb520, 0x1ed8, 0x3aa0, 0xb120,
569         0x3040, 0xb4a0, 0x1f30, 0x3880, 0xb0e0,
570         0x3040, 0xb400, 0x1f78, 0x3680, 0xb0a0,
571         0x3020, 0xb340, 0x1fb8, 0x34a0, 0xb060,
572         0x3020, 0xb240, 0x1fe0, 0x32e0, 0xb040,
573         0x3020, 0xb140, 0x1ff8, 0x3160, 0xb020,
574         0xb000, 0x3000, 0x0800, 0x3000, 0xb000
575 };
576
577 static const u16 uv_static_hcoeffs[N_HORIZ_UV_TAPS * N_PHASES] = {
578         0x3000, 0x1800, 0x1800, 0xb000, 0x18d0, 0x2e60,
579         0xb000, 0x1990, 0x2ce0, 0xb020, 0x1a68, 0x2b40,
580         0xb040, 0x1b20, 0x29e0, 0xb060, 0x1bd8, 0x2880,
581         0xb080, 0x1c88, 0x3e60, 0xb0a0, 0x1d28, 0x3c00,
582         0xb0c0, 0x1db8, 0x39e0, 0xb0e0, 0x1e40, 0x37e0,
583         0xb100, 0x1eb8, 0x3620, 0xb100, 0x1f18, 0x34a0,
584         0xb100, 0x1f68, 0x3360, 0xb0e0, 0x1fa8, 0x3240,
585         0xb0c0, 0x1fe0, 0x3140, 0xb060, 0x1ff0, 0x30a0,
586         0x3000, 0x0800, 0x3000
587 };
588
589 static void update_polyphase_filter(struct overlay_registers __iomem *regs)
590 {
591         memcpy_toio(regs->Y_HCOEFS, y_static_hcoeffs, sizeof(y_static_hcoeffs));
592         memcpy_toio(regs->UV_HCOEFS, uv_static_hcoeffs,
593                     sizeof(uv_static_hcoeffs));
594 }
595
596 static bool update_scaling_factors(struct intel_overlay *overlay,
597                                    struct overlay_registers __iomem *regs,
598                                    struct put_image_params *params)
599 {
600         /* fixed point with a 12 bit shift */
601         u32 xscale, yscale, xscale_UV, yscale_UV;
602 #define FP_SHIFT 12
603 #define FRACT_MASK 0xfff
604         bool scale_changed = false;
605         int uv_hscale = uv_hsubsampling(params->format);
606         int uv_vscale = uv_vsubsampling(params->format);
607
608         if (params->dst_w > 1)
609                 xscale = ((params->src_scan_w - 1) << FP_SHIFT)
610                         /(params->dst_w);
611         else
612                 xscale = 1 << FP_SHIFT;
613
614         if (params->dst_h > 1)
615                 yscale = ((params->src_scan_h - 1) << FP_SHIFT)
616                         /(params->dst_h);
617         else
618                 yscale = 1 << FP_SHIFT;
619
620         /*if (params->format & I915_OVERLAY_YUV_PLANAR) {*/
621         xscale_UV = xscale/uv_hscale;
622         yscale_UV = yscale/uv_vscale;
623         /* make the Y scale to UV scale ratio an exact multiply */
624         xscale = xscale_UV * uv_hscale;
625         yscale = yscale_UV * uv_vscale;
626         /*} else {
627           xscale_UV = 0;
628           yscale_UV = 0;
629           }*/
630
631         if (xscale != overlay->old_xscale || yscale != overlay->old_yscale)
632                 scale_changed = true;
633         overlay->old_xscale = xscale;
634         overlay->old_yscale = yscale;
635
636         iowrite32(((yscale & FRACT_MASK) << 20) |
637                   ((xscale >> FP_SHIFT)  << 16) |
638                   ((xscale & FRACT_MASK) << 3),
639                  &regs->YRGBSCALE);
640
641         iowrite32(((yscale_UV & FRACT_MASK) << 20) |
642                   ((xscale_UV >> FP_SHIFT)  << 16) |
643                   ((xscale_UV & FRACT_MASK) << 3),
644                  &regs->UVSCALE);
645
646         iowrite32((((yscale    >> FP_SHIFT) << 16) |
647                    ((yscale_UV >> FP_SHIFT) << 0)),
648                  &regs->UVSCALEV);
649
650         if (scale_changed)
651                 update_polyphase_filter(regs);
652
653         return scale_changed;
654 }
655
656 static void update_colorkey(struct intel_overlay *overlay,
657                             struct overlay_registers __iomem *regs)
658 {
659         u32 key = overlay->color_key;
660         u32 flags;
661
662         flags = 0;
663         if (overlay->color_key_enabled)
664                 flags |= DST_KEY_ENABLE;
665
666         switch (overlay->crtc->base.primary->fb->bits_per_pixel) {
667         case 8:
668                 key = 0;
669                 flags |= CLK_RGB8I_MASK;
670                 break;
671
672         case 16:
673                 if (overlay->crtc->base.primary->fb->depth == 15) {
674                         key = RGB15_TO_COLORKEY(key);
675                         flags |= CLK_RGB15_MASK;
676                 } else {
677                         key = RGB16_TO_COLORKEY(key);
678                         flags |= CLK_RGB16_MASK;
679                 }
680                 break;
681
682         case 24:
683         case 32:
684                 flags |= CLK_RGB24_MASK;
685                 break;
686         }
687
688         iowrite32(key, &regs->DCLRKV);
689         iowrite32(flags, &regs->DCLRKM);
690 }
691
692 static u32 overlay_cmd_reg(struct put_image_params *params)
693 {
694         u32 cmd = OCMD_ENABLE | OCMD_BUF_TYPE_FRAME | OCMD_BUFFER0;
695
696         if (params->format & I915_OVERLAY_YUV_PLANAR) {
697                 switch (params->format & I915_OVERLAY_DEPTH_MASK) {
698                 case I915_OVERLAY_YUV422:
699                         cmd |= OCMD_YUV_422_PLANAR;
700                         break;
701                 case I915_OVERLAY_YUV420:
702                         cmd |= OCMD_YUV_420_PLANAR;
703                         break;
704                 case I915_OVERLAY_YUV411:
705                 case I915_OVERLAY_YUV410:
706                         cmd |= OCMD_YUV_410_PLANAR;
707                         break;
708                 }
709         } else { /* YUV packed */
710                 switch (params->format & I915_OVERLAY_DEPTH_MASK) {
711                 case I915_OVERLAY_YUV422:
712                         cmd |= OCMD_YUV_422_PACKED;
713                         break;
714                 case I915_OVERLAY_YUV411:
715                         cmd |= OCMD_YUV_411_PACKED;
716                         break;
717                 }
718
719                 switch (params->format & I915_OVERLAY_SWAP_MASK) {
720                 case I915_OVERLAY_NO_SWAP:
721                         break;
722                 case I915_OVERLAY_UV_SWAP:
723                         cmd |= OCMD_UV_SWAP;
724                         break;
725                 case I915_OVERLAY_Y_SWAP:
726                         cmd |= OCMD_Y_SWAP;
727                         break;
728                 case I915_OVERLAY_Y_AND_UV_SWAP:
729                         cmd |= OCMD_Y_AND_UV_SWAP;
730                         break;
731                 }
732         }
733
734         return cmd;
735 }
736
737 static int intel_overlay_do_put_image(struct intel_overlay *overlay,
738                                       struct drm_i915_gem_object *new_bo,
739                                       struct put_image_params *params)
740 {
741         int ret, tmp_width;
742         struct overlay_registers __iomem *regs;
743         bool scale_changed = false;
744         struct drm_device *dev = overlay->dev;
745         u32 swidth, swidthsw, sheight, ostride;
746         enum pipe pipe = overlay->crtc->pipe;
747
748         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
749         WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
750
751         ret = intel_overlay_release_old_vid(overlay);
752         if (ret != 0)
753                 return ret;
754
755         ret = i915_gem_object_pin_to_display_plane(new_bo, 0,
756                                                    &i915_ggtt_view_normal);
757         if (ret != 0)
758                 return ret;
759
760         ret = i915_gem_object_put_fence(new_bo);
761         if (ret)
762                 goto out_unpin;
763
764         if (!overlay->active) {
765                 u32 oconfig;
766                 regs = intel_overlay_map_regs(overlay);
767                 if (!regs) {
768                         ret = -ENOMEM;
769                         goto out_unpin;
770                 }
771                 oconfig = OCONF_CC_OUT_8BIT;
772                 if (IS_GEN4(overlay->dev))
773                         oconfig |= OCONF_CSC_MODE_BT709;
774                 oconfig |= pipe == 0 ?
775                         OCONF_PIPE_A : OCONF_PIPE_B;
776                 iowrite32(oconfig, &regs->OCONFIG);
777                 intel_overlay_unmap_regs(overlay, regs);
778
779                 ret = intel_overlay_on(overlay);
780                 if (ret != 0)
781                         goto out_unpin;
782         }
783
784         regs = intel_overlay_map_regs(overlay);
785         if (!regs) {
786                 ret = -ENOMEM;
787                 goto out_unpin;
788         }
789
790         iowrite32((params->dst_y << 16) | params->dst_x, &regs->DWINPOS);
791         iowrite32((params->dst_h << 16) | params->dst_w, &regs->DWINSZ);
792
793         if (params->format & I915_OVERLAY_YUV_PACKED)
794                 tmp_width = packed_width_bytes(params->format, params->src_w);
795         else
796                 tmp_width = params->src_w;
797
798         swidth = params->src_w;
799         swidthsw = calc_swidthsw(overlay->dev, params->offset_Y, tmp_width);
800         sheight = params->src_h;
801         iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, &regs->OBUF_0Y);
802         ostride = params->stride_Y;
803
804         if (params->format & I915_OVERLAY_YUV_PLANAR) {
805                 int uv_hscale = uv_hsubsampling(params->format);
806                 int uv_vscale = uv_vsubsampling(params->format);
807                 u32 tmp_U, tmp_V;
808                 swidth |= (params->src_w/uv_hscale) << 16;
809                 tmp_U = calc_swidthsw(overlay->dev, params->offset_U,
810                                       params->src_w/uv_hscale);
811                 tmp_V = calc_swidthsw(overlay->dev, params->offset_V,
812                                       params->src_w/uv_hscale);
813                 swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
814                 sheight |= (params->src_h/uv_vscale) << 16;
815                 iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, &regs->OBUF_0U);
816                 iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, &regs->OBUF_0V);
817                 ostride |= params->stride_UV << 16;
818         }
819
820         iowrite32(swidth, &regs->SWIDTH);
821         iowrite32(swidthsw, &regs->SWIDTHSW);
822         iowrite32(sheight, &regs->SHEIGHT);
823         iowrite32(ostride, &regs->OSTRIDE);
824
825         scale_changed = update_scaling_factors(overlay, regs, params);
826
827         update_colorkey(overlay, regs);
828
829         iowrite32(overlay_cmd_reg(params), &regs->OCMD);
830
831         intel_overlay_unmap_regs(overlay, regs);
832
833         ret = intel_overlay_continue(overlay, scale_changed);
834         if (ret)
835                 goto out_unpin;
836
837         i915_gem_track_fb(overlay->vid_bo, new_bo,
838                           INTEL_FRONTBUFFER_OVERLAY(pipe));
839
840         overlay->old_vid_bo = overlay->vid_bo;
841         overlay->vid_bo = new_bo;
842
843         intel_frontbuffer_flip(dev,
844                                INTEL_FRONTBUFFER_OVERLAY(pipe));
845
846         return 0;
847
848 out_unpin:
849         i915_gem_object_ggtt_unpin(new_bo);
850         return ret;
851 }
852
853 int intel_overlay_switch_off(struct intel_overlay *overlay)
854 {
855         struct overlay_registers __iomem *regs;
856         struct drm_device *dev = overlay->dev;
857         int ret;
858
859         WARN_ON(!mutex_is_locked(&dev->struct_mutex));
860         WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
861
862         ret = intel_overlay_recover_from_interrupt(overlay);
863         if (ret != 0)
864                 return ret;
865
866         if (!overlay->active)
867                 return 0;
868
869         ret = intel_overlay_release_old_vid(overlay);
870         if (ret != 0)
871                 return ret;
872
873         regs = intel_overlay_map_regs(overlay);
874         iowrite32(0, &regs->OCMD);
875         intel_overlay_unmap_regs(overlay, regs);
876
877         ret = intel_overlay_off(overlay);
878         if (ret != 0)
879                 return ret;
880
881         intel_overlay_off_tail(overlay);
882         return 0;
883 }
884
885 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
886                                           struct intel_crtc *crtc)
887 {
888         if (!crtc->active)
889                 return -EINVAL;
890
891         /* can't use the overlay with double wide pipe */
892         if (crtc->config->double_wide)
893                 return -EINVAL;
894
895         return 0;
896 }
897
898 static void update_pfit_vscale_ratio(struct intel_overlay *overlay)
899 {
900         struct drm_device *dev = overlay->dev;
901         struct drm_i915_private *dev_priv = dev->dev_private;
902         u32 pfit_control = I915_READ(PFIT_CONTROL);
903         u32 ratio;
904
905         /* XXX: This is not the same logic as in the xorg driver, but more in
906          * line with the intel documentation for the i965
907          */
908         if (INTEL_INFO(dev)->gen >= 4) {
909                 /* on i965 use the PGM reg to read out the autoscaler values */
910                 ratio = I915_READ(PFIT_PGM_RATIOS) >> PFIT_VERT_SCALE_SHIFT_965;
911         } else {
912                 if (pfit_control & VERT_AUTO_SCALE)
913                         ratio = I915_READ(PFIT_AUTO_RATIOS);
914                 else
915                         ratio = I915_READ(PFIT_PGM_RATIOS);
916                 ratio >>= PFIT_VERT_SCALE_SHIFT;
917         }
918
919         overlay->pfit_vscale_ratio = ratio;
920 }
921
922 static int check_overlay_dst(struct intel_overlay *overlay,
923                              struct drm_intel_overlay_put_image *rec)
924 {
925         struct drm_display_mode *mode = &overlay->crtc->base.mode;
926
927         if (rec->dst_x < mode->hdisplay &&
928             rec->dst_x + rec->dst_width <= mode->hdisplay &&
929             rec->dst_y < mode->vdisplay &&
930             rec->dst_y + rec->dst_height <= mode->vdisplay)
931                 return 0;
932         else
933                 return -EINVAL;
934 }
935
936 static int check_overlay_scaling(struct put_image_params *rec)
937 {
938         u32 tmp;
939
940         /* downscaling limit is 8.0 */
941         tmp = ((rec->src_scan_h << 16) / rec->dst_h) >> 16;
942         if (tmp > 7)
943                 return -EINVAL;
944         tmp = ((rec->src_scan_w << 16) / rec->dst_w) >> 16;
945         if (tmp > 7)
946                 return -EINVAL;
947
948         return 0;
949 }
950
951 static int check_overlay_src(struct drm_device *dev,
952                              struct drm_intel_overlay_put_image *rec,
953                              struct drm_i915_gem_object *new_bo)
954 {
955         int uv_hscale = uv_hsubsampling(rec->flags);
956         int uv_vscale = uv_vsubsampling(rec->flags);
957         u32 stride_mask;
958         int depth;
959         u32 tmp;
960
961         /* check src dimensions */
962         if (IS_845G(dev) || IS_I830(dev)) {
963                 if (rec->src_height > IMAGE_MAX_HEIGHT_LEGACY ||
964                     rec->src_width  > IMAGE_MAX_WIDTH_LEGACY)
965                         return -EINVAL;
966         } else {
967                 if (rec->src_height > IMAGE_MAX_HEIGHT ||
968                     rec->src_width  > IMAGE_MAX_WIDTH)
969                         return -EINVAL;
970         }
971
972         /* better safe than sorry, use 4 as the maximal subsampling ratio */
973         if (rec->src_height < N_VERT_Y_TAPS*4 ||
974             rec->src_width  < N_HORIZ_Y_TAPS*4)
975                 return -EINVAL;
976
977         /* check alignment constraints */
978         switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
979         case I915_OVERLAY_RGB:
980                 /* not implemented */
981                 return -EINVAL;
982
983         case I915_OVERLAY_YUV_PACKED:
984                 if (uv_vscale != 1)
985                         return -EINVAL;
986
987                 depth = packed_depth_bytes(rec->flags);
988                 if (depth < 0)
989                         return depth;
990
991                 /* ignore UV planes */
992                 rec->stride_UV = 0;
993                 rec->offset_U = 0;
994                 rec->offset_V = 0;
995                 /* check pixel alignment */
996                 if (rec->offset_Y % depth)
997                         return -EINVAL;
998                 break;
999
1000         case I915_OVERLAY_YUV_PLANAR:
1001                 if (uv_vscale < 0 || uv_hscale < 0)
1002                         return -EINVAL;
1003                 /* no offset restrictions for planar formats */
1004                 break;
1005
1006         default:
1007                 return -EINVAL;
1008         }
1009
1010         if (rec->src_width % uv_hscale)
1011                 return -EINVAL;
1012
1013         /* stride checking */
1014         if (IS_I830(dev) || IS_845G(dev))
1015                 stride_mask = 255;
1016         else
1017                 stride_mask = 63;
1018
1019         if (rec->stride_Y & stride_mask || rec->stride_UV & stride_mask)
1020                 return -EINVAL;
1021         if (IS_GEN4(dev) && rec->stride_Y < 512)
1022                 return -EINVAL;
1023
1024         tmp = (rec->flags & I915_OVERLAY_TYPE_MASK) == I915_OVERLAY_YUV_PLANAR ?
1025                 4096 : 8192;
1026         if (rec->stride_Y > tmp || rec->stride_UV > 2*1024)
1027                 return -EINVAL;
1028
1029         /* check buffer dimensions */
1030         switch (rec->flags & I915_OVERLAY_TYPE_MASK) {
1031         case I915_OVERLAY_RGB:
1032         case I915_OVERLAY_YUV_PACKED:
1033                 /* always 4 Y values per depth pixels */
1034                 if (packed_width_bytes(rec->flags, rec->src_width) > rec->stride_Y)
1035                         return -EINVAL;
1036
1037                 tmp = rec->stride_Y*rec->src_height;
1038                 if (rec->offset_Y + tmp > new_bo->base.size)
1039                         return -EINVAL;
1040                 break;
1041
1042         case I915_OVERLAY_YUV_PLANAR:
1043                 if (rec->src_width > rec->stride_Y)
1044                         return -EINVAL;
1045                 if (rec->src_width/uv_hscale > rec->stride_UV)
1046                         return -EINVAL;
1047
1048                 tmp = rec->stride_Y * rec->src_height;
1049                 if (rec->offset_Y + tmp > new_bo->base.size)
1050                         return -EINVAL;
1051
1052                 tmp = rec->stride_UV * (rec->src_height / uv_vscale);
1053                 if (rec->offset_U + tmp > new_bo->base.size ||
1054                     rec->offset_V + tmp > new_bo->base.size)
1055                         return -EINVAL;
1056                 break;
1057         }
1058
1059         return 0;
1060 }
1061
1062 /**
1063  * Return the pipe currently connected to the panel fitter,
1064  * or -1 if the panel fitter is not present or not in use
1065  */
1066 static int intel_panel_fitter_pipe(struct drm_device *dev)
1067 {
1068         struct drm_i915_private *dev_priv = dev->dev_private;
1069         u32  pfit_control;
1070
1071         /* i830 doesn't have a panel fitter */
1072         if (INTEL_INFO(dev)->gen <= 3 && (IS_I830(dev) || !IS_MOBILE(dev)))
1073                 return -1;
1074
1075         pfit_control = I915_READ(PFIT_CONTROL);
1076
1077         /* See if the panel fitter is in use */
1078         if ((pfit_control & PFIT_ENABLE) == 0)
1079                 return -1;
1080
1081         /* 965 can place panel fitter on either pipe */
1082         if (IS_GEN4(dev))
1083                 return (pfit_control >> 29) & 0x3;
1084
1085         /* older chips can only use pipe 1 */
1086         return 1;
1087 }
1088
1089 int intel_overlay_put_image(struct drm_device *dev, void *data,
1090                             struct drm_file *file_priv)
1091 {
1092         struct drm_intel_overlay_put_image *put_image_rec = data;
1093         struct drm_i915_private *dev_priv = dev->dev_private;
1094         struct intel_overlay *overlay;
1095         struct drm_crtc *drmmode_crtc;
1096         struct intel_crtc *crtc;
1097         struct drm_i915_gem_object *new_bo;
1098         struct put_image_params *params;
1099         int ret;
1100
1101         overlay = dev_priv->overlay;
1102         if (!overlay) {
1103                 DRM_DEBUG("userspace bug: no overlay\n");
1104                 return -ENODEV;
1105         }
1106
1107         if (!(put_image_rec->flags & I915_OVERLAY_ENABLE)) {
1108                 drm_modeset_lock_all(dev);
1109                 mutex_lock(&dev->struct_mutex);
1110
1111                 ret = intel_overlay_switch_off(overlay);
1112
1113                 mutex_unlock(&dev->struct_mutex);
1114                 drm_modeset_unlock_all(dev);
1115
1116                 return ret;
1117         }
1118
1119         params = kmalloc(sizeof(*params), GFP_KERNEL);
1120         if (!params)
1121                 return -ENOMEM;
1122
1123         drmmode_crtc = drm_crtc_find(dev, put_image_rec->crtc_id);
1124         if (!drmmode_crtc) {
1125                 ret = -ENOENT;
1126                 goto out_free;
1127         }
1128         crtc = to_intel_crtc(drmmode_crtc);
1129
1130         new_bo = to_intel_bo(drm_gem_object_lookup(file_priv,
1131                                                    put_image_rec->bo_handle));
1132         if (&new_bo->base == NULL) {
1133                 ret = -ENOENT;
1134                 goto out_free;
1135         }
1136
1137         drm_modeset_lock_all(dev);
1138         mutex_lock(&dev->struct_mutex);
1139
1140         if (new_bo->tiling_mode) {
1141                 DRM_DEBUG_KMS("buffer used for overlay image can not be tiled\n");
1142                 ret = -EINVAL;
1143                 goto out_unlock;
1144         }
1145
1146         ret = intel_overlay_recover_from_interrupt(overlay);
1147         if (ret != 0)
1148                 goto out_unlock;
1149
1150         if (overlay->crtc != crtc) {
1151                 struct drm_display_mode *mode = &crtc->base.mode;
1152                 ret = intel_overlay_switch_off(overlay);
1153                 if (ret != 0)
1154                         goto out_unlock;
1155
1156                 ret = check_overlay_possible_on_crtc(overlay, crtc);
1157                 if (ret != 0)
1158                         goto out_unlock;
1159
1160                 overlay->crtc = crtc;
1161                 crtc->overlay = overlay;
1162
1163                 /* line too wide, i.e. one-line-mode */
1164                 if (mode->hdisplay > 1024 &&
1165                     intel_panel_fitter_pipe(dev) == crtc->pipe) {
1166                         overlay->pfit_active = true;
1167                         update_pfit_vscale_ratio(overlay);
1168                 } else
1169                         overlay->pfit_active = false;
1170         }
1171
1172         ret = check_overlay_dst(overlay, put_image_rec);
1173         if (ret != 0)
1174                 goto out_unlock;
1175
1176         if (overlay->pfit_active) {
1177                 params->dst_y = ((((u32)put_image_rec->dst_y) << 12) /
1178                                  overlay->pfit_vscale_ratio);
1179                 /* shifting right rounds downwards, so add 1 */
1180                 params->dst_h = ((((u32)put_image_rec->dst_height) << 12) /
1181                                  overlay->pfit_vscale_ratio) + 1;
1182         } else {
1183                 params->dst_y = put_image_rec->dst_y;
1184                 params->dst_h = put_image_rec->dst_height;
1185         }
1186         params->dst_x = put_image_rec->dst_x;
1187         params->dst_w = put_image_rec->dst_width;
1188
1189         params->src_w = put_image_rec->src_width;
1190         params->src_h = put_image_rec->src_height;
1191         params->src_scan_w = put_image_rec->src_scan_width;
1192         params->src_scan_h = put_image_rec->src_scan_height;
1193         if (params->src_scan_h > params->src_h ||
1194             params->src_scan_w > params->src_w) {
1195                 ret = -EINVAL;
1196                 goto out_unlock;
1197         }
1198
1199         ret = check_overlay_src(dev, put_image_rec, new_bo);
1200         if (ret != 0)
1201                 goto out_unlock;
1202         params->format = put_image_rec->flags & ~I915_OVERLAY_FLAGS_MASK;
1203         params->stride_Y = put_image_rec->stride_Y;
1204         params->stride_UV = put_image_rec->stride_UV;
1205         params->offset_Y = put_image_rec->offset_Y;
1206         params->offset_U = put_image_rec->offset_U;
1207         params->offset_V = put_image_rec->offset_V;
1208
1209         /* Check scaling after src size to prevent a divide-by-zero. */
1210         ret = check_overlay_scaling(params);
1211         if (ret != 0)
1212                 goto out_unlock;
1213
1214         ret = intel_overlay_do_put_image(overlay, new_bo, params);
1215         if (ret != 0)
1216                 goto out_unlock;
1217
1218         mutex_unlock(&dev->struct_mutex);
1219         drm_modeset_unlock_all(dev);
1220
1221         kfree(params);
1222
1223         return 0;
1224
1225 out_unlock:
1226         mutex_unlock(&dev->struct_mutex);
1227         drm_modeset_unlock_all(dev);
1228         drm_gem_object_unreference_unlocked(&new_bo->base);
1229 out_free:
1230         kfree(params);
1231
1232         return ret;
1233 }
1234
1235 static void update_reg_attrs(struct intel_overlay *overlay,
1236                              struct overlay_registers __iomem *regs)
1237 {
1238         iowrite32((overlay->contrast << 18) | (overlay->brightness & 0xff),
1239                   &regs->OCLRC0);
1240         iowrite32(overlay->saturation, &regs->OCLRC1);
1241 }
1242
1243 static bool check_gamma_bounds(u32 gamma1, u32 gamma2)
1244 {
1245         int i;
1246
1247         if (gamma1 & 0xff000000 || gamma2 & 0xff000000)
1248                 return false;
1249
1250         for (i = 0; i < 3; i++) {
1251                 if (((gamma1 >> i*8) & 0xff) >= ((gamma2 >> i*8) & 0xff))
1252                         return false;
1253         }
1254
1255         return true;
1256 }
1257
1258 static bool check_gamma5_errata(u32 gamma5)
1259 {
1260         int i;
1261
1262         for (i = 0; i < 3; i++) {
1263                 if (((gamma5 >> i*8) & 0xff) == 0x80)
1264                         return false;
1265         }
1266
1267         return true;
1268 }
1269
1270 static int check_gamma(struct drm_intel_overlay_attrs *attrs)
1271 {
1272         if (!check_gamma_bounds(0, attrs->gamma0) ||
1273             !check_gamma_bounds(attrs->gamma0, attrs->gamma1) ||
1274             !check_gamma_bounds(attrs->gamma1, attrs->gamma2) ||
1275             !check_gamma_bounds(attrs->gamma2, attrs->gamma3) ||
1276             !check_gamma_bounds(attrs->gamma3, attrs->gamma4) ||
1277             !check_gamma_bounds(attrs->gamma4, attrs->gamma5) ||
1278             !check_gamma_bounds(attrs->gamma5, 0x00ffffff))
1279                 return -EINVAL;
1280
1281         if (!check_gamma5_errata(attrs->gamma5))
1282                 return -EINVAL;
1283
1284         return 0;
1285 }
1286
1287 int intel_overlay_attrs(struct drm_device *dev, void *data,
1288                         struct drm_file *file_priv)
1289 {
1290         struct drm_intel_overlay_attrs *attrs = data;
1291         struct drm_i915_private *dev_priv = dev->dev_private;
1292         struct intel_overlay *overlay;
1293         struct overlay_registers __iomem *regs;
1294         int ret;
1295
1296         overlay = dev_priv->overlay;
1297         if (!overlay) {
1298                 DRM_DEBUG("userspace bug: no overlay\n");
1299                 return -ENODEV;
1300         }
1301
1302         drm_modeset_lock_all(dev);
1303         mutex_lock(&dev->struct_mutex);
1304
1305         ret = -EINVAL;
1306         if (!(attrs->flags & I915_OVERLAY_UPDATE_ATTRS)) {
1307                 attrs->color_key  = overlay->color_key;
1308                 attrs->brightness = overlay->brightness;
1309                 attrs->contrast   = overlay->contrast;
1310                 attrs->saturation = overlay->saturation;
1311
1312                 if (!IS_GEN2(dev)) {
1313                         attrs->gamma0 = I915_READ(OGAMC0);
1314                         attrs->gamma1 = I915_READ(OGAMC1);
1315                         attrs->gamma2 = I915_READ(OGAMC2);
1316                         attrs->gamma3 = I915_READ(OGAMC3);
1317                         attrs->gamma4 = I915_READ(OGAMC4);
1318                         attrs->gamma5 = I915_READ(OGAMC5);
1319                 }
1320         } else {
1321                 if (attrs->brightness < -128 || attrs->brightness > 127)
1322                         goto out_unlock;
1323                 if (attrs->contrast > 255)
1324                         goto out_unlock;
1325                 if (attrs->saturation > 1023)
1326                         goto out_unlock;
1327
1328                 overlay->color_key  = attrs->color_key;
1329                 overlay->brightness = attrs->brightness;
1330                 overlay->contrast   = attrs->contrast;
1331                 overlay->saturation = attrs->saturation;
1332
1333                 regs = intel_overlay_map_regs(overlay);
1334                 if (!regs) {
1335                         ret = -ENOMEM;
1336                         goto out_unlock;
1337                 }
1338
1339                 update_reg_attrs(overlay, regs);
1340
1341                 intel_overlay_unmap_regs(overlay, regs);
1342
1343                 if (attrs->flags & I915_OVERLAY_UPDATE_GAMMA) {
1344                         if (IS_GEN2(dev))
1345                                 goto out_unlock;
1346
1347                         if (overlay->active) {
1348                                 ret = -EBUSY;
1349                                 goto out_unlock;
1350                         }
1351
1352                         ret = check_gamma(attrs);
1353                         if (ret)
1354                                 goto out_unlock;
1355
1356                         I915_WRITE(OGAMC0, attrs->gamma0);
1357                         I915_WRITE(OGAMC1, attrs->gamma1);
1358                         I915_WRITE(OGAMC2, attrs->gamma2);
1359                         I915_WRITE(OGAMC3, attrs->gamma3);
1360                         I915_WRITE(OGAMC4, attrs->gamma4);
1361                         I915_WRITE(OGAMC5, attrs->gamma5);
1362                 }
1363         }
1364         overlay->color_key_enabled = (attrs->flags & I915_OVERLAY_DISABLE_DEST_COLORKEY) == 0;
1365
1366         ret = 0;
1367 out_unlock:
1368         mutex_unlock(&dev->struct_mutex);
1369         drm_modeset_unlock_all(dev);
1370
1371         return ret;
1372 }
1373
1374 void intel_setup_overlay(struct drm_device *dev)
1375 {
1376         struct drm_i915_private *dev_priv = dev->dev_private;
1377         struct intel_overlay *overlay;
1378         struct drm_i915_gem_object *reg_bo;
1379         struct overlay_registers __iomem *regs;
1380         int ret;
1381
1382         if (!HAS_OVERLAY(dev))
1383                 return;
1384
1385         overlay = kzalloc(sizeof(*overlay), GFP_KERNEL);
1386         if (!overlay)
1387                 return;
1388
1389         mutex_lock(&dev->struct_mutex);
1390         if (WARN_ON(dev_priv->overlay))
1391                 goto out_free;
1392
1393         overlay->dev = dev;
1394
1395         reg_bo = NULL;
1396         if (!OVERLAY_NEEDS_PHYSICAL(dev))
1397                 reg_bo = i915_gem_object_create_stolen(dev, PAGE_SIZE);
1398         if (reg_bo == NULL)
1399                 reg_bo = i915_gem_alloc_object(dev, PAGE_SIZE);
1400         if (reg_bo == NULL)
1401                 goto out_free;
1402         overlay->reg_bo = reg_bo;
1403
1404         if (OVERLAY_NEEDS_PHYSICAL(dev)) {
1405                 ret = i915_gem_object_attach_phys(reg_bo, PAGE_SIZE);
1406                 if (ret) {
1407                         DRM_ERROR("failed to attach phys overlay regs\n");
1408                         goto out_free_bo;
1409                 }
1410                 overlay->flip_addr = reg_bo->phys_handle->busaddr;
1411         } else {
1412                 ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
1413                 if (ret) {
1414                         DRM_ERROR("failed to pin overlay register bo\n");
1415                         goto out_free_bo;
1416                 }
1417                 overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo);
1418
1419                 ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
1420                 if (ret) {
1421                         DRM_ERROR("failed to move overlay register bo into the GTT\n");
1422                         goto out_unpin_bo;
1423                 }
1424         }
1425
1426         /* init all values */
1427         overlay->color_key = 0x0101fe;
1428         overlay->color_key_enabled = true;
1429         overlay->brightness = -19;
1430         overlay->contrast = 75;
1431         overlay->saturation = 146;
1432
1433         regs = intel_overlay_map_regs(overlay);
1434         if (!regs)
1435                 goto out_unpin_bo;
1436
1437         memset_io(regs, 0, sizeof(struct overlay_registers));
1438         update_polyphase_filter(regs);
1439         update_reg_attrs(overlay, regs);
1440
1441         intel_overlay_unmap_regs(overlay, regs);
1442
1443         dev_priv->overlay = overlay;
1444         mutex_unlock(&dev->struct_mutex);
1445         DRM_INFO("initialized overlay support\n");
1446         return;
1447
1448 out_unpin_bo:
1449         if (!OVERLAY_NEEDS_PHYSICAL(dev))
1450                 i915_gem_object_ggtt_unpin(reg_bo);
1451 out_free_bo:
1452         drm_gem_object_unreference(&reg_bo->base);
1453 out_free:
1454         mutex_unlock(&dev->struct_mutex);
1455         kfree(overlay);
1456         return;
1457 }
1458
1459 void intel_cleanup_overlay(struct drm_device *dev)
1460 {
1461         struct drm_i915_private *dev_priv = dev->dev_private;
1462
1463         if (!dev_priv->overlay)
1464                 return;
1465
1466         /* The bo's should be free'd by the generic code already.
1467          * Furthermore modesetting teardown happens beforehand so the
1468          * hardware should be off already */
1469         WARN_ON(dev_priv->overlay->active);
1470
1471         drm_gem_object_unreference_unlocked(&dev_priv->overlay->reg_bo->base);
1472         kfree(dev_priv->overlay);
1473 }
1474
1475 struct intel_overlay_error_state {
1476         struct overlay_registers regs;
1477         unsigned long base;
1478         u32 dovsta;
1479         u32 isr;
1480 };
1481
1482 static struct overlay_registers __iomem *
1483 intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
1484 {
1485         struct drm_i915_private *dev_priv = to_i915(overlay->dev);
1486         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1487         struct overlay_registers __iomem *regs;
1488
1489         if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1490                 /* Cast to make sparse happy, but it's wc memory anyway, so
1491                  * equivalent to the wc io mapping on X86. */
1492                 regs = (struct overlay_registers __iomem *)
1493                         overlay->reg_bo->phys_handle->vaddr;
1494         else
1495                 regs = io_mapping_map_atomic_wc(ggtt->mappable,
1496                                                 i915_gem_obj_ggtt_offset(overlay->reg_bo));
1497
1498         return regs;
1499 }
1500
1501 static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay,
1502                                         struct overlay_registers __iomem *regs)
1503 {
1504         if (!OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1505                 io_mapping_unmap_atomic(regs);
1506 }
1507
1508
1509 struct intel_overlay_error_state *
1510 intel_overlay_capture_error_state(struct drm_device *dev)
1511 {
1512         struct drm_i915_private *dev_priv = dev->dev_private;
1513         struct intel_overlay *overlay = dev_priv->overlay;
1514         struct intel_overlay_error_state *error;
1515         struct overlay_registers __iomem *regs;
1516
1517         if (!overlay || !overlay->active)
1518                 return NULL;
1519
1520         error = kmalloc(sizeof(*error), GFP_ATOMIC);
1521         if (error == NULL)
1522                 return NULL;
1523
1524         error->dovsta = I915_READ(DOVSTA);
1525         error->isr = I915_READ(ISR);
1526         if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
1527                 error->base = (__force long)overlay->reg_bo->phys_handle->vaddr;
1528         else
1529                 error->base = i915_gem_obj_ggtt_offset(overlay->reg_bo);
1530
1531         regs = intel_overlay_map_regs_atomic(overlay);
1532         if (!regs)
1533                 goto err;
1534
1535         memcpy_fromio(&error->regs, regs, sizeof(struct overlay_registers));
1536         intel_overlay_unmap_regs_atomic(overlay, regs);
1537
1538         return error;
1539
1540 err:
1541         kfree(error);
1542         return NULL;
1543 }
1544
1545 void
1546 intel_overlay_print_error_state(struct drm_i915_error_state_buf *m,
1547                                 struct intel_overlay_error_state *error)
1548 {
1549         i915_error_printf(m, "Overlay, status: 0x%08x, interrupt: 0x%08x\n",
1550                           error->dovsta, error->isr);
1551         i915_error_printf(m, "  Register file at 0x%08lx:\n",
1552                           error->base);
1553
1554 #define P(x) i915_error_printf(m, "    " #x ":  0x%08x\n", error->regs.x)
1555         P(OBUF_0Y);
1556         P(OBUF_1Y);
1557         P(OBUF_0U);
1558         P(OBUF_0V);
1559         P(OBUF_1U);
1560         P(OBUF_1V);
1561         P(OSTRIDE);
1562         P(YRGB_VPH);
1563         P(UV_VPH);
1564         P(HORZ_PH);
1565         P(INIT_PHS);
1566         P(DWINPOS);
1567         P(DWINSZ);
1568         P(SWIDTH);
1569         P(SWIDTHSW);
1570         P(SHEIGHT);
1571         P(YRGBSCALE);
1572         P(UVSCALE);
1573         P(OCLRC0);
1574         P(OCLRC1);
1575         P(DCLRKV);
1576         P(DCLRKM);
1577         P(SCLRKVH);
1578         P(SCLRKVL);
1579         P(SCLRKEN);
1580         P(OCONFIG);
1581         P(OCMD);
1582         P(OSTART_0Y);
1583         P(OSTART_1Y);
1584         P(OSTART_0U);
1585         P(OSTART_0V);
1586         P(OSTART_1U);
1587         P(OSTART_1V);
1588         P(OTILEOFF_0Y);
1589         P(OTILEOFF_1Y);
1590         P(OTILEOFF_0U);
1591         P(OTILEOFF_0V);
1592         P(OTILEOFF_1U);
1593         P(OTILEOFF_1V);
1594         P(FASTHSCALE);
1595         P(UVSCALEV);
1596 #undef P
1597 }