Merge branch 'for-3.7/core' of git://git.kernel.dk/linux-block
[cascardo/linux.git] / drivers / gpu / drm / radeon / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include <drm/drmP.h>
31 #include <drm/drm_buffer.h>
32 #include <drm/radeon_drm.h>
33 #include "radeon_drv.h"
34
35 /* ================================================================
36  * Helper functions for client state checking and fixup
37  */
38
39 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
40                                                     dev_priv,
41                                                     struct drm_file * file_priv,
42                                                     u32 *offset)
43 {
44         u64 off = *offset;
45         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
46         struct drm_radeon_driver_file_fields *radeon_priv;
47
48         /* Hrm ... the story of the offset ... So this function converts
49          * the various ideas of what userland clients might have for an
50          * offset in the card address space into an offset into the card
51          * address space :) So with a sane client, it should just keep
52          * the value intact and just do some boundary checking. However,
53          * not all clients are sane. Some older clients pass us 0 based
54          * offsets relative to the start of the framebuffer and some may
55          * assume the AGP aperture it appended to the framebuffer, so we
56          * try to detect those cases and fix them up.
57          *
58          * Note: It might be a good idea here to make sure the offset lands
59          * in some "allowed" area to protect things like the PCIE GART...
60          */
61
62         /* First, the best case, the offset already lands in either the
63          * framebuffer or the GART mapped space
64          */
65         if (radeon_check_offset(dev_priv, off))
66                 return 0;
67
68         /* Ok, that didn't happen... now check if we have a zero based
69          * offset that fits in the framebuffer + gart space, apply the
70          * magic offset we get from SETPARAM or calculated from fb_location
71          */
72         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
73                 radeon_priv = file_priv->driver_priv;
74                 off += radeon_priv->radeon_fb_delta;
75         }
76
77         /* Finally, assume we aimed at a GART offset if beyond the fb */
78         if (off > fb_end)
79                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
80
81         /* Now recheck and fail if out of bounds */
82         if (radeon_check_offset(dev_priv, off)) {
83                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
84                 *offset = off;
85                 return 0;
86         }
87         return -EINVAL;
88 }
89
90 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
91                                                      dev_priv,
92                                                      struct drm_file *file_priv,
93                                                      int id, struct drm_buffer *buf)
94 {
95         u32 *data;
96         switch (id) {
97
98         case RADEON_EMIT_PP_MISC:
99                 data = drm_buffer_pointer_to_dword(buf,
100                         (RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4);
101
102                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
103                         DRM_ERROR("Invalid depth buffer offset\n");
104                         return -EINVAL;
105                 }
106                 dev_priv->have_z_offset = 1;
107                 break;
108
109         case RADEON_EMIT_PP_CNTL:
110                 data = drm_buffer_pointer_to_dword(buf,
111                         (RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4);
112
113                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
114                         DRM_ERROR("Invalid colour buffer offset\n");
115                         return -EINVAL;
116                 }
117                 break;
118
119         case R200_EMIT_PP_TXOFFSET_0:
120         case R200_EMIT_PP_TXOFFSET_1:
121         case R200_EMIT_PP_TXOFFSET_2:
122         case R200_EMIT_PP_TXOFFSET_3:
123         case R200_EMIT_PP_TXOFFSET_4:
124         case R200_EMIT_PP_TXOFFSET_5:
125                 data = drm_buffer_pointer_to_dword(buf, 0);
126                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
127                         DRM_ERROR("Invalid R200 texture offset\n");
128                         return -EINVAL;
129                 }
130                 break;
131
132         case RADEON_EMIT_PP_TXFILTER_0:
133         case RADEON_EMIT_PP_TXFILTER_1:
134         case RADEON_EMIT_PP_TXFILTER_2:
135                 data = drm_buffer_pointer_to_dword(buf,
136                         (RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4);
137                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
138                         DRM_ERROR("Invalid R100 texture offset\n");
139                         return -EINVAL;
140                 }
141                 break;
142
143         case R200_EMIT_PP_CUBIC_OFFSETS_0:
144         case R200_EMIT_PP_CUBIC_OFFSETS_1:
145         case R200_EMIT_PP_CUBIC_OFFSETS_2:
146         case R200_EMIT_PP_CUBIC_OFFSETS_3:
147         case R200_EMIT_PP_CUBIC_OFFSETS_4:
148         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
149                         int i;
150                         for (i = 0; i < 5; i++) {
151                                 data = drm_buffer_pointer_to_dword(buf, i);
152                                 if (radeon_check_and_fixup_offset(dev_priv,
153                                                                   file_priv,
154                                                                   data)) {
155                                         DRM_ERROR
156                                             ("Invalid R200 cubic texture offset\n");
157                                         return -EINVAL;
158                                 }
159                         }
160                         break;
161                 }
162
163         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
164         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
165         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
166                         int i;
167                         for (i = 0; i < 5; i++) {
168                                 data = drm_buffer_pointer_to_dword(buf, i);
169                                 if (radeon_check_and_fixup_offset(dev_priv,
170                                                                   file_priv,
171                                                                   data)) {
172                                         DRM_ERROR
173                                             ("Invalid R100 cubic texture offset\n");
174                                         return -EINVAL;
175                                 }
176                         }
177                 }
178                 break;
179
180         case R200_EMIT_VAP_CTL:{
181                         RING_LOCALS;
182                         BEGIN_RING(2);
183                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
184                         ADVANCE_RING();
185                 }
186                 break;
187
188         case RADEON_EMIT_RB3D_COLORPITCH:
189         case RADEON_EMIT_RE_LINE_PATTERN:
190         case RADEON_EMIT_SE_LINE_WIDTH:
191         case RADEON_EMIT_PP_LUM_MATRIX:
192         case RADEON_EMIT_PP_ROT_MATRIX_0:
193         case RADEON_EMIT_RB3D_STENCILREFMASK:
194         case RADEON_EMIT_SE_VPORT_XSCALE:
195         case RADEON_EMIT_SE_CNTL:
196         case RADEON_EMIT_SE_CNTL_STATUS:
197         case RADEON_EMIT_RE_MISC:
198         case RADEON_EMIT_PP_BORDER_COLOR_0:
199         case RADEON_EMIT_PP_BORDER_COLOR_1:
200         case RADEON_EMIT_PP_BORDER_COLOR_2:
201         case RADEON_EMIT_SE_ZBIAS_FACTOR:
202         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
203         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
204         case R200_EMIT_PP_TXCBLEND_0:
205         case R200_EMIT_PP_TXCBLEND_1:
206         case R200_EMIT_PP_TXCBLEND_2:
207         case R200_EMIT_PP_TXCBLEND_3:
208         case R200_EMIT_PP_TXCBLEND_4:
209         case R200_EMIT_PP_TXCBLEND_5:
210         case R200_EMIT_PP_TXCBLEND_6:
211         case R200_EMIT_PP_TXCBLEND_7:
212         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
213         case R200_EMIT_TFACTOR_0:
214         case R200_EMIT_VTX_FMT_0:
215         case R200_EMIT_MATRIX_SELECT_0:
216         case R200_EMIT_TEX_PROC_CTL_2:
217         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
218         case R200_EMIT_PP_TXFILTER_0:
219         case R200_EMIT_PP_TXFILTER_1:
220         case R200_EMIT_PP_TXFILTER_2:
221         case R200_EMIT_PP_TXFILTER_3:
222         case R200_EMIT_PP_TXFILTER_4:
223         case R200_EMIT_PP_TXFILTER_5:
224         case R200_EMIT_VTE_CNTL:
225         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
226         case R200_EMIT_PP_TAM_DEBUG3:
227         case R200_EMIT_PP_CNTL_X:
228         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
229         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
230         case R200_EMIT_RE_SCISSOR_TL_0:
231         case R200_EMIT_RE_SCISSOR_TL_1:
232         case R200_EMIT_RE_SCISSOR_TL_2:
233         case R200_EMIT_SE_VAP_CNTL_STATUS:
234         case R200_EMIT_SE_VTX_STATE_CNTL:
235         case R200_EMIT_RE_POINTSIZE:
236         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
237         case R200_EMIT_PP_CUBIC_FACES_0:
238         case R200_EMIT_PP_CUBIC_FACES_1:
239         case R200_EMIT_PP_CUBIC_FACES_2:
240         case R200_EMIT_PP_CUBIC_FACES_3:
241         case R200_EMIT_PP_CUBIC_FACES_4:
242         case R200_EMIT_PP_CUBIC_FACES_5:
243         case RADEON_EMIT_PP_TEX_SIZE_0:
244         case RADEON_EMIT_PP_TEX_SIZE_1:
245         case RADEON_EMIT_PP_TEX_SIZE_2:
246         case R200_EMIT_RB3D_BLENDCOLOR:
247         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
248         case RADEON_EMIT_PP_CUBIC_FACES_0:
249         case RADEON_EMIT_PP_CUBIC_FACES_1:
250         case RADEON_EMIT_PP_CUBIC_FACES_2:
251         case R200_EMIT_PP_TRI_PERF_CNTL:
252         case R200_EMIT_PP_AFS_0:
253         case R200_EMIT_PP_AFS_1:
254         case R200_EMIT_ATF_TFACTOR:
255         case R200_EMIT_PP_TXCTLALL_0:
256         case R200_EMIT_PP_TXCTLALL_1:
257         case R200_EMIT_PP_TXCTLALL_2:
258         case R200_EMIT_PP_TXCTLALL_3:
259         case R200_EMIT_PP_TXCTLALL_4:
260         case R200_EMIT_PP_TXCTLALL_5:
261         case R200_EMIT_VAP_PVS_CNTL:
262                 /* These packets don't contain memory offsets */
263                 break;
264
265         default:
266                 DRM_ERROR("Unknown state packet ID %d\n", id);
267                 return -EINVAL;
268         }
269
270         return 0;
271 }
272
273 static int radeon_check_and_fixup_packet3(drm_radeon_private_t *
274                                           dev_priv,
275                                           struct drm_file *file_priv,
276                                           drm_radeon_kcmd_buffer_t *
277                                           cmdbuf,
278                                           unsigned int *cmdsz)
279 {
280         u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
281         u32 offset, narrays;
282         int count, i, k;
283
284         count = ((*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16);
285         *cmdsz = 2 + count;
286
287         if ((*cmd & 0xc0000000) != RADEON_CP_PACKET3) {
288                 DRM_ERROR("Not a type 3 packet\n");
289                 return -EINVAL;
290         }
291
292         if (4 * *cmdsz > drm_buffer_unprocessed(cmdbuf->buffer)) {
293                 DRM_ERROR("Packet size larger than size of data provided\n");
294                 return -EINVAL;
295         }
296
297         switch (*cmd & 0xff00) {
298         /* XXX Are there old drivers needing other packets? */
299
300         case RADEON_3D_DRAW_IMMD:
301         case RADEON_3D_DRAW_VBUF:
302         case RADEON_3D_DRAW_INDX:
303         case RADEON_WAIT_FOR_IDLE:
304         case RADEON_CP_NOP:
305         case RADEON_3D_CLEAR_ZMASK:
306 /*      case RADEON_CP_NEXT_CHAR:
307         case RADEON_CP_PLY_NEXTSCAN:
308         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
309                 /* these packets are safe */
310                 break;
311
312         case RADEON_CP_3D_DRAW_IMMD_2:
313         case RADEON_CP_3D_DRAW_VBUF_2:
314         case RADEON_CP_3D_DRAW_INDX_2:
315         case RADEON_3D_CLEAR_HIZ:
316                 /* safe but r200 only */
317                 if (dev_priv->microcode_version != UCODE_R200) {
318                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
319                         return -EINVAL;
320                 }
321                 break;
322
323         case RADEON_3D_LOAD_VBPNTR:
324
325                 if (count > 18) { /* 12 arrays max */
326                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
327                                   count);
328                         return -EINVAL;
329                 }
330
331                 /* carefully check packet contents */
332                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
333
334                 narrays = *cmd & ~0xc000;
335                 k = 0;
336                 i = 2;
337                 while ((k < narrays) && (i < (count + 2))) {
338                         i++;            /* skip attribute field */
339                         cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
340                         if (radeon_check_and_fixup_offset(dev_priv, file_priv,
341                                                           cmd)) {
342                                 DRM_ERROR
343                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
344                                      k, i);
345                                 return -EINVAL;
346                         }
347                         k++;
348                         i++;
349                         if (k == narrays)
350                                 break;
351                         /* have one more to process, they come in pairs */
352                         cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
353
354                         if (radeon_check_and_fixup_offset(dev_priv,
355                                                           file_priv, cmd))
356                         {
357                                 DRM_ERROR
358                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
359                                      k, i);
360                                 return -EINVAL;
361                         }
362                         k++;
363                         i++;
364                 }
365                 /* do the counts match what we expect ? */
366                 if ((k != narrays) || (i != (count + 2))) {
367                         DRM_ERROR
368                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
369                               k, i, narrays, count + 1);
370                         return -EINVAL;
371                 }
372                 break;
373
374         case RADEON_3D_RNDR_GEN_INDX_PRIM:
375                 if (dev_priv->microcode_version != UCODE_R100) {
376                         DRM_ERROR("Invalid 3d packet for r200-class chip\n");
377                         return -EINVAL;
378                 }
379
380                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
381                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
382                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
383                                 return -EINVAL;
384                 }
385                 break;
386
387         case RADEON_CP_INDX_BUFFER:
388                 if (dev_priv->microcode_version != UCODE_R200) {
389                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
390                         return -EINVAL;
391                 }
392
393                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
394                 if ((*cmd & 0x8000ffff) != 0x80000810) {
395                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", *cmd);
396                         return -EINVAL;
397                 }
398                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
399                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
400                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", *cmd);
401                         return -EINVAL;
402                 }
403                 break;
404
405         case RADEON_CNTL_HOSTDATA_BLT:
406         case RADEON_CNTL_PAINT_MULTI:
407         case RADEON_CNTL_BITBLT_MULTI:
408                 /* MSB of opcode: next DWORD GUI_CNTL */
409                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
410                 if (*cmd & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
411                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
412                         u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
413                         offset = *cmd2 << 10;
414                         if (radeon_check_and_fixup_offset
415                             (dev_priv, file_priv, &offset)) {
416                                 DRM_ERROR("Invalid first packet offset\n");
417                                 return -EINVAL;
418                         }
419                         *cmd2 = (*cmd2 & 0xffc00000) | offset >> 10;
420                 }
421
422                 if ((*cmd & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
423                     (*cmd & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
424                         u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
425                         offset = *cmd3 << 10;
426                         if (radeon_check_and_fixup_offset
427                             (dev_priv, file_priv, &offset)) {
428                                 DRM_ERROR("Invalid second packet offset\n");
429                                 return -EINVAL;
430                         }
431                         *cmd3 = (*cmd3 & 0xffc00000) | offset >> 10;
432                 }
433                 break;
434
435         default:
436                 DRM_ERROR("Invalid packet type %x\n", *cmd & 0xff00);
437                 return -EINVAL;
438         }
439
440         return 0;
441 }
442
443 /* ================================================================
444  * CP hardware state programming functions
445  */
446
447 static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
448                                   struct drm_clip_rect * box)
449 {
450         RING_LOCALS;
451
452         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
453                   box->x1, box->y1, box->x2, box->y2);
454
455         BEGIN_RING(4);
456         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
457         OUT_RING((box->y1 << 16) | box->x1);
458         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
459         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
460         ADVANCE_RING();
461 }
462
463 /* Emit 1.1 state
464  */
465 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
466                              struct drm_file *file_priv,
467                              drm_radeon_context_regs_t * ctx,
468                              drm_radeon_texture_regs_t * tex,
469                              unsigned int dirty)
470 {
471         RING_LOCALS;
472         DRM_DEBUG("dirty=0x%08x\n", dirty);
473
474         if (dirty & RADEON_UPLOAD_CONTEXT) {
475                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
476                                                   &ctx->rb3d_depthoffset)) {
477                         DRM_ERROR("Invalid depth buffer offset\n");
478                         return -EINVAL;
479                 }
480
481                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
482                                                   &ctx->rb3d_coloroffset)) {
483                         DRM_ERROR("Invalid depth buffer offset\n");
484                         return -EINVAL;
485                 }
486
487                 BEGIN_RING(14);
488                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
489                 OUT_RING(ctx->pp_misc);
490                 OUT_RING(ctx->pp_fog_color);
491                 OUT_RING(ctx->re_solid_color);
492                 OUT_RING(ctx->rb3d_blendcntl);
493                 OUT_RING(ctx->rb3d_depthoffset);
494                 OUT_RING(ctx->rb3d_depthpitch);
495                 OUT_RING(ctx->rb3d_zstencilcntl);
496                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
497                 OUT_RING(ctx->pp_cntl);
498                 OUT_RING(ctx->rb3d_cntl);
499                 OUT_RING(ctx->rb3d_coloroffset);
500                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
501                 OUT_RING(ctx->rb3d_colorpitch);
502                 ADVANCE_RING();
503         }
504
505         if (dirty & RADEON_UPLOAD_VERTFMT) {
506                 BEGIN_RING(2);
507                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
508                 OUT_RING(ctx->se_coord_fmt);
509                 ADVANCE_RING();
510         }
511
512         if (dirty & RADEON_UPLOAD_LINE) {
513                 BEGIN_RING(5);
514                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
515                 OUT_RING(ctx->re_line_pattern);
516                 OUT_RING(ctx->re_line_state);
517                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
518                 OUT_RING(ctx->se_line_width);
519                 ADVANCE_RING();
520         }
521
522         if (dirty & RADEON_UPLOAD_BUMPMAP) {
523                 BEGIN_RING(5);
524                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
525                 OUT_RING(ctx->pp_lum_matrix);
526                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
527                 OUT_RING(ctx->pp_rot_matrix_0);
528                 OUT_RING(ctx->pp_rot_matrix_1);
529                 ADVANCE_RING();
530         }
531
532         if (dirty & RADEON_UPLOAD_MASKS) {
533                 BEGIN_RING(4);
534                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
535                 OUT_RING(ctx->rb3d_stencilrefmask);
536                 OUT_RING(ctx->rb3d_ropcntl);
537                 OUT_RING(ctx->rb3d_planemask);
538                 ADVANCE_RING();
539         }
540
541         if (dirty & RADEON_UPLOAD_VIEWPORT) {
542                 BEGIN_RING(7);
543                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
544                 OUT_RING(ctx->se_vport_xscale);
545                 OUT_RING(ctx->se_vport_xoffset);
546                 OUT_RING(ctx->se_vport_yscale);
547                 OUT_RING(ctx->se_vport_yoffset);
548                 OUT_RING(ctx->se_vport_zscale);
549                 OUT_RING(ctx->se_vport_zoffset);
550                 ADVANCE_RING();
551         }
552
553         if (dirty & RADEON_UPLOAD_SETUP) {
554                 BEGIN_RING(4);
555                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
556                 OUT_RING(ctx->se_cntl);
557                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
558                 OUT_RING(ctx->se_cntl_status);
559                 ADVANCE_RING();
560         }
561
562         if (dirty & RADEON_UPLOAD_MISC) {
563                 BEGIN_RING(2);
564                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
565                 OUT_RING(ctx->re_misc);
566                 ADVANCE_RING();
567         }
568
569         if (dirty & RADEON_UPLOAD_TEX0) {
570                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
571                                                   &tex[0].pp_txoffset)) {
572                         DRM_ERROR("Invalid texture offset for unit 0\n");
573                         return -EINVAL;
574                 }
575
576                 BEGIN_RING(9);
577                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
578                 OUT_RING(tex[0].pp_txfilter);
579                 OUT_RING(tex[0].pp_txformat);
580                 OUT_RING(tex[0].pp_txoffset);
581                 OUT_RING(tex[0].pp_txcblend);
582                 OUT_RING(tex[0].pp_txablend);
583                 OUT_RING(tex[0].pp_tfactor);
584                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
585                 OUT_RING(tex[0].pp_border_color);
586                 ADVANCE_RING();
587         }
588
589         if (dirty & RADEON_UPLOAD_TEX1) {
590                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
591                                                   &tex[1].pp_txoffset)) {
592                         DRM_ERROR("Invalid texture offset for unit 1\n");
593                         return -EINVAL;
594                 }
595
596                 BEGIN_RING(9);
597                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
598                 OUT_RING(tex[1].pp_txfilter);
599                 OUT_RING(tex[1].pp_txformat);
600                 OUT_RING(tex[1].pp_txoffset);
601                 OUT_RING(tex[1].pp_txcblend);
602                 OUT_RING(tex[1].pp_txablend);
603                 OUT_RING(tex[1].pp_tfactor);
604                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
605                 OUT_RING(tex[1].pp_border_color);
606                 ADVANCE_RING();
607         }
608
609         if (dirty & RADEON_UPLOAD_TEX2) {
610                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
611                                                   &tex[2].pp_txoffset)) {
612                         DRM_ERROR("Invalid texture offset for unit 2\n");
613                         return -EINVAL;
614                 }
615
616                 BEGIN_RING(9);
617                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
618                 OUT_RING(tex[2].pp_txfilter);
619                 OUT_RING(tex[2].pp_txformat);
620                 OUT_RING(tex[2].pp_txoffset);
621                 OUT_RING(tex[2].pp_txcblend);
622                 OUT_RING(tex[2].pp_txablend);
623                 OUT_RING(tex[2].pp_tfactor);
624                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
625                 OUT_RING(tex[2].pp_border_color);
626                 ADVANCE_RING();
627         }
628
629         return 0;
630 }
631
632 /* Emit 1.2 state
633  */
634 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
635                               struct drm_file *file_priv,
636                               drm_radeon_state_t * state)
637 {
638         RING_LOCALS;
639
640         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
641                 BEGIN_RING(3);
642                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
643                 OUT_RING(state->context2.se_zbias_factor);
644                 OUT_RING(state->context2.se_zbias_constant);
645                 ADVANCE_RING();
646         }
647
648         return radeon_emit_state(dev_priv, file_priv, &state->context,
649                                  state->tex, state->dirty);
650 }
651
652 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
653  * 1.3 cmdbuffers allow all previous state to be updated as well as
654  * the tcl scalar and vector areas.
655  */
656 static struct {
657         int start;
658         int len;
659         const char *name;
660 } packet[RADEON_MAX_STATE_PACKETS] = {
661         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
662         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
663         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
664         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
665         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
666         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
667         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
668         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
669         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
670         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
671         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
672         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
673         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
674         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
675         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
676         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
677         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
678         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
679         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
680         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
681         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
682                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
683         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
684         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
685         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
686         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
687         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
688         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
689         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
690         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
691         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
692         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
693         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
694         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
695         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
696         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
697         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
698         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
699         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
700         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
701         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
702         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
703         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
704         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
705         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
706         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
707         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
708         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
709         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
710         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
711         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
712          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
713         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
714         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
715         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
716         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
717         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
718         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
719         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
720         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
721         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
722         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
723         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
724                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
725         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
726         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
727         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
728         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
729         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
730         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
731         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
732         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
733         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
734         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
735         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
736         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
737         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
738         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
739         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
740         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
741         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
742         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
743         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
744         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
745         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
746         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
747         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
748         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
749         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
750         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
751         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
752         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
753         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
754         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
755         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
756         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
757         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
758         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
759 };
760
761 /* ================================================================
762  * Performance monitoring functions
763  */
764
765 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
766                              struct drm_radeon_master_private *master_priv,
767                              int x, int y, int w, int h, int r, int g, int b)
768 {
769         u32 color;
770         RING_LOCALS;
771
772         x += master_priv->sarea_priv->boxes[0].x1;
773         y += master_priv->sarea_priv->boxes[0].y1;
774
775         switch (dev_priv->color_fmt) {
776         case RADEON_COLOR_FORMAT_RGB565:
777                 color = (((r & 0xf8) << 8) |
778                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
779                 break;
780         case RADEON_COLOR_FORMAT_ARGB8888:
781         default:
782                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
783                 break;
784         }
785
786         BEGIN_RING(4);
787         RADEON_WAIT_UNTIL_3D_IDLE();
788         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
789         OUT_RING(0xffffffff);
790         ADVANCE_RING();
791
792         BEGIN_RING(6);
793
794         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
795         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
796                  RADEON_GMC_BRUSH_SOLID_COLOR |
797                  (dev_priv->color_fmt << 8) |
798                  RADEON_GMC_SRC_DATATYPE_COLOR |
799                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
800
801         if (master_priv->sarea_priv->pfCurrentPage == 1) {
802                 OUT_RING(dev_priv->front_pitch_offset);
803         } else {
804                 OUT_RING(dev_priv->back_pitch_offset);
805         }
806
807         OUT_RING(color);
808
809         OUT_RING((x << 16) | y);
810         OUT_RING((w << 16) | h);
811
812         ADVANCE_RING();
813 }
814
815 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv, struct drm_radeon_master_private *master_priv)
816 {
817         /* Collapse various things into a wait flag -- trying to
818          * guess if userspase slept -- better just to have them tell us.
819          */
820         if (dev_priv->stats.last_frame_reads > 1 ||
821             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
822                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
823         }
824
825         if (dev_priv->stats.freelist_loops) {
826                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
827         }
828
829         /* Purple box for page flipping
830          */
831         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
832                 radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
833
834         /* Red box if we have to wait for idle at any point
835          */
836         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
837                 radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
838
839         /* Blue box: lost context?
840          */
841
842         /* Yellow box for texture swaps
843          */
844         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
845                 radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
846
847         /* Green box if hardware never idles (as far as we can tell)
848          */
849         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
850                 radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
851
852         /* Draw bars indicating number of buffers allocated
853          * (not a great measure, easily confused)
854          */
855         if (dev_priv->stats.requested_bufs) {
856                 if (dev_priv->stats.requested_bufs > 100)
857                         dev_priv->stats.requested_bufs = 100;
858
859                 radeon_clear_box(dev_priv, master_priv, 4, 16,
860                                  dev_priv->stats.requested_bufs, 4,
861                                  196, 128, 128);
862         }
863
864         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
865
866 }
867
868 /* ================================================================
869  * CP command dispatch functions
870  */
871
872 static void radeon_cp_dispatch_clear(struct drm_device * dev,
873                                      struct drm_master *master,
874                                      drm_radeon_clear_t * clear,
875                                      drm_radeon_clear_rect_t * depth_boxes)
876 {
877         drm_radeon_private_t *dev_priv = dev->dev_private;
878         struct drm_radeon_master_private *master_priv = master->driver_priv;
879         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
880         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
881         int nbox = sarea_priv->nbox;
882         struct drm_clip_rect *pbox = sarea_priv->boxes;
883         unsigned int flags = clear->flags;
884         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
885         int i;
886         RING_LOCALS;
887         DRM_DEBUG("flags = 0x%x\n", flags);
888
889         dev_priv->stats.clears++;
890
891         if (sarea_priv->pfCurrentPage == 1) {
892                 unsigned int tmp = flags;
893
894                 flags &= ~(RADEON_FRONT | RADEON_BACK);
895                 if (tmp & RADEON_FRONT)
896                         flags |= RADEON_BACK;
897                 if (tmp & RADEON_BACK)
898                         flags |= RADEON_FRONT;
899         }
900         if (flags & (RADEON_DEPTH|RADEON_STENCIL)) {
901                 if (!dev_priv->have_z_offset) {
902                         printk_once(KERN_ERR "radeon: illegal depth clear request. Buggy mesa detected - please update.\n");
903                         flags &= ~(RADEON_DEPTH | RADEON_STENCIL);
904                 }
905         }
906
907         if (flags & (RADEON_FRONT | RADEON_BACK)) {
908
909                 BEGIN_RING(4);
910
911                 /* Ensure the 3D stream is idle before doing a
912                  * 2D fill to clear the front or back buffer.
913                  */
914                 RADEON_WAIT_UNTIL_3D_IDLE();
915
916                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
917                 OUT_RING(clear->color_mask);
918
919                 ADVANCE_RING();
920
921                 /* Make sure we restore the 3D state next time.
922                  */
923                 sarea_priv->ctx_owner = 0;
924
925                 for (i = 0; i < nbox; i++) {
926                         int x = pbox[i].x1;
927                         int y = pbox[i].y1;
928                         int w = pbox[i].x2 - x;
929                         int h = pbox[i].y2 - y;
930
931                         DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
932                                   x, y, w, h, flags);
933
934                         if (flags & RADEON_FRONT) {
935                                 BEGIN_RING(6);
936
937                                 OUT_RING(CP_PACKET3
938                                          (RADEON_CNTL_PAINT_MULTI, 4));
939                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
940                                          RADEON_GMC_BRUSH_SOLID_COLOR |
941                                          (dev_priv->
942                                           color_fmt << 8) |
943                                          RADEON_GMC_SRC_DATATYPE_COLOR |
944                                          RADEON_ROP3_P |
945                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
946
947                                 OUT_RING(dev_priv->front_pitch_offset);
948                                 OUT_RING(clear->clear_color);
949
950                                 OUT_RING((x << 16) | y);
951                                 OUT_RING((w << 16) | h);
952
953                                 ADVANCE_RING();
954                         }
955
956                         if (flags & RADEON_BACK) {
957                                 BEGIN_RING(6);
958
959                                 OUT_RING(CP_PACKET3
960                                          (RADEON_CNTL_PAINT_MULTI, 4));
961                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
962                                          RADEON_GMC_BRUSH_SOLID_COLOR |
963                                          (dev_priv->
964                                           color_fmt << 8) |
965                                          RADEON_GMC_SRC_DATATYPE_COLOR |
966                                          RADEON_ROP3_P |
967                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
968
969                                 OUT_RING(dev_priv->back_pitch_offset);
970                                 OUT_RING(clear->clear_color);
971
972                                 OUT_RING((x << 16) | y);
973                                 OUT_RING((w << 16) | h);
974
975                                 ADVANCE_RING();
976                         }
977                 }
978         }
979
980         /* hyper z clear */
981         /* no docs available, based on reverse engineering by Stephane Marchesin */
982         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
983             && (flags & RADEON_CLEAR_FASTZ)) {
984
985                 int i;
986                 int depthpixperline =
987                     dev_priv->depth_fmt ==
988                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
989                                                        2) : (dev_priv->
990                                                              depth_pitch / 4);
991
992                 u32 clearmask;
993
994                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
995                     ((clear->depth_mask & 0xff) << 24);
996
997                 /* Make sure we restore the 3D state next time.
998                  * we haven't touched any "normal" state - still need this?
999                  */
1000                 sarea_priv->ctx_owner = 0;
1001
1002                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1003                     && (flags & RADEON_USE_HIERZ)) {
1004                         /* FIXME : reverse engineer that for Rx00 cards */
1005                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1006                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1007                            value into account? */
1008                         /* pattern seems to work for r100, though get slight
1009                            rendering errors with glxgears. If hierz is not enabled for r100,
1010                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
1011                            other ones are ignored, and the same clear mask can be used. That's
1012                            very different behaviour than R200 which needs different clear mask
1013                            and different number of tiles to clear if hierz is enabled or not !?!
1014                          */
1015                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
1016                 } else {
1017                         /* clear mask : chooses the clearing pattern.
1018                            rv250: could be used to clear only parts of macrotiles
1019                            (but that would get really complicated...)?
1020                            bit 0 and 1 (either or both of them ?!?!) are used to
1021                            not clear tile (or maybe one of the bits indicates if the tile is
1022                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
1023                            Pattern is as follows:
1024                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
1025                            bits -------------------------------------------------
1026                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
1027                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1028                            covers 256 pixels ?!?
1029                          */
1030                         clearmask = 0x0;
1031                 }
1032
1033                 BEGIN_RING(8);
1034                 RADEON_WAIT_UNTIL_2D_IDLE();
1035                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1036                              tempRB3D_DEPTHCLEARVALUE);
1037                 /* what offset is this exactly ? */
1038                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1039                 /* need ctlstat, otherwise get some strange black flickering */
1040                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1041                              RADEON_RB3D_ZC_FLUSH_ALL);
1042                 ADVANCE_RING();
1043
1044                 for (i = 0; i < nbox; i++) {
1045                         int tileoffset, nrtilesx, nrtilesy, j;
1046                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1047                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
1048                             && !(dev_priv->microcode_version == UCODE_R200)) {
1049                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1050                                    maybe r200 actually doesn't need to put the low-res z value into
1051                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
1052                                    Works for R100, both with hierz and without.
1053                                    R100 seems to operate on 2x1 8x8 tiles, but...
1054                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1055                                    problematic with resolutions which are not 64 pix aligned? */
1056                                 tileoffset =
1057                                     ((pbox[i].y1 >> 3) * depthpixperline +
1058                                      pbox[i].x1) >> 6;
1059                                 nrtilesx =
1060                                     ((pbox[i].x2 & ~63) -
1061                                      (pbox[i].x1 & ~63)) >> 4;
1062                                 nrtilesy =
1063                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1064                                 for (j = 0; j <= nrtilesy; j++) {
1065                                         BEGIN_RING(4);
1066                                         OUT_RING(CP_PACKET3
1067                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1068                                         /* first tile */
1069                                         OUT_RING(tileoffset * 8);
1070                                         /* the number of tiles to clear */
1071                                         OUT_RING(nrtilesx + 4);
1072                                         /* clear mask : chooses the clearing pattern. */
1073                                         OUT_RING(clearmask);
1074                                         ADVANCE_RING();
1075                                         tileoffset += depthpixperline >> 6;
1076                                 }
1077                         } else if (dev_priv->microcode_version == UCODE_R200) {
1078                                 /* works for rv250. */
1079                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1080                                 tileoffset =
1081                                     ((pbox[i].y1 >> 3) * depthpixperline +
1082                                      pbox[i].x1) >> 5;
1083                                 nrtilesx =
1084                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1085                                 nrtilesy =
1086                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1087                                 for (j = 0; j <= nrtilesy; j++) {
1088                                         BEGIN_RING(4);
1089                                         OUT_RING(CP_PACKET3
1090                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1091                                         /* first tile */
1092                                         /* judging by the first tile offset needed, could possibly
1093                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
1094                                            macro tiles, though would still need clear mask for
1095                                            right/bottom if truly 4x4 granularity is desired ? */
1096                                         OUT_RING(tileoffset * 16);
1097                                         /* the number of tiles to clear */
1098                                         OUT_RING(nrtilesx + 1);
1099                                         /* clear mask : chooses the clearing pattern. */
1100                                         OUT_RING(clearmask);
1101                                         ADVANCE_RING();
1102                                         tileoffset += depthpixperline >> 5;
1103                                 }
1104                         } else {        /* rv 100 */
1105                                 /* rv100 might not need 64 pix alignment, who knows */
1106                                 /* offsets are, hmm, weird */
1107                                 tileoffset =
1108                                     ((pbox[i].y1 >> 4) * depthpixperline +
1109                                      pbox[i].x1) >> 6;
1110                                 nrtilesx =
1111                                     ((pbox[i].x2 & ~63) -
1112                                      (pbox[i].x1 & ~63)) >> 4;
1113                                 nrtilesy =
1114                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1115                                 for (j = 0; j <= nrtilesy; j++) {
1116                                         BEGIN_RING(4);
1117                                         OUT_RING(CP_PACKET3
1118                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1119                                         OUT_RING(tileoffset * 128);
1120                                         /* the number of tiles to clear */
1121                                         OUT_RING(nrtilesx + 4);
1122                                         /* clear mask : chooses the clearing pattern. */
1123                                         OUT_RING(clearmask);
1124                                         ADVANCE_RING();
1125                                         tileoffset += depthpixperline >> 6;
1126                                 }
1127                         }
1128                 }
1129
1130                 /* TODO don't always clear all hi-level z tiles */
1131                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1132                     && (dev_priv->microcode_version == UCODE_R200)
1133                     && (flags & RADEON_USE_HIERZ))
1134                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1135                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1136                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1137                            value into account? */
1138                 {
1139                         BEGIN_RING(4);
1140                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1141                         OUT_RING(0x0);  /* First tile */
1142                         OUT_RING(0x3cc0);
1143                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1144                         ADVANCE_RING();
1145                 }
1146         }
1147
1148         /* We have to clear the depth and/or stencil buffers by
1149          * rendering a quad into just those buffers.  Thus, we have to
1150          * make sure the 3D engine is configured correctly.
1151          */
1152         else if ((dev_priv->microcode_version == UCODE_R200) &&
1153                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1154
1155                 int tempPP_CNTL;
1156                 int tempRE_CNTL;
1157                 int tempRB3D_CNTL;
1158                 int tempRB3D_ZSTENCILCNTL;
1159                 int tempRB3D_STENCILREFMASK;
1160                 int tempRB3D_PLANEMASK;
1161                 int tempSE_CNTL;
1162                 int tempSE_VTE_CNTL;
1163                 int tempSE_VTX_FMT_0;
1164                 int tempSE_VTX_FMT_1;
1165                 int tempSE_VAP_CNTL;
1166                 int tempRE_AUX_SCISSOR_CNTL;
1167
1168                 tempPP_CNTL = 0;
1169                 tempRE_CNTL = 0;
1170
1171                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1172
1173                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1174                 tempRB3D_STENCILREFMASK = 0x0;
1175
1176                 tempSE_CNTL = depth_clear->se_cntl;
1177
1178                 /* Disable TCL */
1179
1180                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1181                                           (0x9 <<
1182                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1183
1184                 tempRB3D_PLANEMASK = 0x0;
1185
1186                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1187
1188                 tempSE_VTE_CNTL =
1189                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1190
1191                 /* Vertex format (X, Y, Z, W) */
1192                 tempSE_VTX_FMT_0 =
1193                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1194                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1195                 tempSE_VTX_FMT_1 = 0x0;
1196
1197                 /*
1198                  * Depth buffer specific enables
1199                  */
1200                 if (flags & RADEON_DEPTH) {
1201                         /* Enable depth buffer */
1202                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1203                 } else {
1204                         /* Disable depth buffer */
1205                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1206                 }
1207
1208                 /*
1209                  * Stencil buffer specific enables
1210                  */
1211                 if (flags & RADEON_STENCIL) {
1212                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1213                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1214                 } else {
1215                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1216                         tempRB3D_STENCILREFMASK = 0x00000000;
1217                 }
1218
1219                 if (flags & RADEON_USE_COMP_ZBUF) {
1220                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1221                             RADEON_Z_DECOMPRESSION_ENABLE;
1222                 }
1223                 if (flags & RADEON_USE_HIERZ) {
1224                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1225                 }
1226
1227                 BEGIN_RING(26);
1228                 RADEON_WAIT_UNTIL_2D_IDLE();
1229
1230                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1231                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1232                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1233                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1234                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1235                              tempRB3D_STENCILREFMASK);
1236                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1237                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1238                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1239                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1240                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1241                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1242                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1243                 ADVANCE_RING();
1244
1245                 /* Make sure we restore the 3D state next time.
1246                  */
1247                 sarea_priv->ctx_owner = 0;
1248
1249                 for (i = 0; i < nbox; i++) {
1250
1251                         /* Funny that this should be required --
1252                          *  sets top-left?
1253                          */
1254                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1255
1256                         BEGIN_RING(14);
1257                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1258                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1259                                   RADEON_PRIM_WALK_RING |
1260                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1261                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1262                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1263                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1264                         OUT_RING(0x3f800000);
1265                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1266                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1267                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1268                         OUT_RING(0x3f800000);
1269                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1270                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1271                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1272                         OUT_RING(0x3f800000);
1273                         ADVANCE_RING();
1274                 }
1275         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1276
1277                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1278
1279                 rb3d_cntl = depth_clear->rb3d_cntl;
1280
1281                 if (flags & RADEON_DEPTH) {
1282                         rb3d_cntl |= RADEON_Z_ENABLE;
1283                 } else {
1284                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1285                 }
1286
1287                 if (flags & RADEON_STENCIL) {
1288                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1289                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1290                 } else {
1291                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1292                         rb3d_stencilrefmask = 0x00000000;
1293                 }
1294
1295                 if (flags & RADEON_USE_COMP_ZBUF) {
1296                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1297                             RADEON_Z_DECOMPRESSION_ENABLE;
1298                 }
1299                 if (flags & RADEON_USE_HIERZ) {
1300                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1301                 }
1302
1303                 BEGIN_RING(13);
1304                 RADEON_WAIT_UNTIL_2D_IDLE();
1305
1306                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1307                 OUT_RING(0x00000000);
1308                 OUT_RING(rb3d_cntl);
1309
1310                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1311                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1312                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1313                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1314                 ADVANCE_RING();
1315
1316                 /* Make sure we restore the 3D state next time.
1317                  */
1318                 sarea_priv->ctx_owner = 0;
1319
1320                 for (i = 0; i < nbox; i++) {
1321
1322                         /* Funny that this should be required --
1323                          *  sets top-left?
1324                          */
1325                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1326
1327                         BEGIN_RING(15);
1328
1329                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1330                         OUT_RING(RADEON_VTX_Z_PRESENT |
1331                                  RADEON_VTX_PKCOLOR_PRESENT);
1332                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1333                                   RADEON_PRIM_WALK_RING |
1334                                   RADEON_MAOS_ENABLE |
1335                                   RADEON_VTX_FMT_RADEON_MODE |
1336                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1337
1338                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1339                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1340                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1341                         OUT_RING(0x0);
1342
1343                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1344                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1345                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1346                         OUT_RING(0x0);
1347
1348                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1349                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1350                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1351                         OUT_RING(0x0);
1352
1353                         ADVANCE_RING();
1354                 }
1355         }
1356
1357         /* Increment the clear counter.  The client-side 3D driver must
1358          * wait on this value before performing the clear ioctl.  We
1359          * need this because the card's so damned fast...
1360          */
1361         sarea_priv->last_clear++;
1362
1363         BEGIN_RING(4);
1364
1365         RADEON_CLEAR_AGE(sarea_priv->last_clear);
1366         RADEON_WAIT_UNTIL_IDLE();
1367
1368         ADVANCE_RING();
1369 }
1370
1371 static void radeon_cp_dispatch_swap(struct drm_device *dev, struct drm_master *master)
1372 {
1373         drm_radeon_private_t *dev_priv = dev->dev_private;
1374         struct drm_radeon_master_private *master_priv = master->driver_priv;
1375         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1376         int nbox = sarea_priv->nbox;
1377         struct drm_clip_rect *pbox = sarea_priv->boxes;
1378         int i;
1379         RING_LOCALS;
1380         DRM_DEBUG("\n");
1381
1382         /* Do some trivial performance monitoring...
1383          */
1384         if (dev_priv->do_boxes)
1385                 radeon_cp_performance_boxes(dev_priv, master_priv);
1386
1387         /* Wait for the 3D stream to idle before dispatching the bitblt.
1388          * This will prevent data corruption between the two streams.
1389          */
1390         BEGIN_RING(2);
1391
1392         RADEON_WAIT_UNTIL_3D_IDLE();
1393
1394         ADVANCE_RING();
1395
1396         for (i = 0; i < nbox; i++) {
1397                 int x = pbox[i].x1;
1398                 int y = pbox[i].y1;
1399                 int w = pbox[i].x2 - x;
1400                 int h = pbox[i].y2 - y;
1401
1402                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1403
1404                 BEGIN_RING(9);
1405
1406                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1407                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1408                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1409                          RADEON_GMC_BRUSH_NONE |
1410                          (dev_priv->color_fmt << 8) |
1411                          RADEON_GMC_SRC_DATATYPE_COLOR |
1412                          RADEON_ROP3_S |
1413                          RADEON_DP_SRC_SOURCE_MEMORY |
1414                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1415
1416                 /* Make this work even if front & back are flipped:
1417                  */
1418                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1419                 if (sarea_priv->pfCurrentPage == 0) {
1420                         OUT_RING(dev_priv->back_pitch_offset);
1421                         OUT_RING(dev_priv->front_pitch_offset);
1422                 } else {
1423                         OUT_RING(dev_priv->front_pitch_offset);
1424                         OUT_RING(dev_priv->back_pitch_offset);
1425                 }
1426
1427                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1428                 OUT_RING((x << 16) | y);
1429                 OUT_RING((x << 16) | y);
1430                 OUT_RING((w << 16) | h);
1431
1432                 ADVANCE_RING();
1433         }
1434
1435         /* Increment the frame counter.  The client-side 3D driver must
1436          * throttle the framerate by waiting for this value before
1437          * performing the swapbuffer ioctl.
1438          */
1439         sarea_priv->last_frame++;
1440
1441         BEGIN_RING(4);
1442
1443         RADEON_FRAME_AGE(sarea_priv->last_frame);
1444         RADEON_WAIT_UNTIL_2D_IDLE();
1445
1446         ADVANCE_RING();
1447 }
1448
1449 void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master)
1450 {
1451         drm_radeon_private_t *dev_priv = dev->dev_private;
1452         struct drm_radeon_master_private *master_priv = master->driver_priv;
1453         struct drm_sarea *sarea = (struct drm_sarea *)master_priv->sarea->handle;
1454         int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
1455             ? dev_priv->front_offset : dev_priv->back_offset;
1456         RING_LOCALS;
1457         DRM_DEBUG("pfCurrentPage=%d\n",
1458                   master_priv->sarea_priv->pfCurrentPage);
1459
1460         /* Do some trivial performance monitoring...
1461          */
1462         if (dev_priv->do_boxes) {
1463                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1464                 radeon_cp_performance_boxes(dev_priv, master_priv);
1465         }
1466
1467         /* Update the frame offsets for both CRTCs
1468          */
1469         BEGIN_RING(6);
1470
1471         RADEON_WAIT_UNTIL_3D_IDLE();
1472         OUT_RING_REG(RADEON_CRTC_OFFSET,
1473                      ((sarea->frame.y * dev_priv->front_pitch +
1474                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1475                      + offset);
1476         OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
1477                      + offset);
1478
1479         ADVANCE_RING();
1480
1481         /* Increment the frame counter.  The client-side 3D driver must
1482          * throttle the framerate by waiting for this value before
1483          * performing the swapbuffer ioctl.
1484          */
1485         master_priv->sarea_priv->last_frame++;
1486         master_priv->sarea_priv->pfCurrentPage =
1487                 1 - master_priv->sarea_priv->pfCurrentPage;
1488
1489         BEGIN_RING(2);
1490
1491         RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
1492
1493         ADVANCE_RING();
1494 }
1495
1496 static int bad_prim_vertex_nr(int primitive, int nr)
1497 {
1498         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1499         case RADEON_PRIM_TYPE_NONE:
1500         case RADEON_PRIM_TYPE_POINT:
1501                 return nr < 1;
1502         case RADEON_PRIM_TYPE_LINE:
1503                 return (nr & 1) || nr == 0;
1504         case RADEON_PRIM_TYPE_LINE_STRIP:
1505                 return nr < 2;
1506         case RADEON_PRIM_TYPE_TRI_LIST:
1507         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1508         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1509         case RADEON_PRIM_TYPE_RECT_LIST:
1510                 return nr % 3 || nr == 0;
1511         case RADEON_PRIM_TYPE_TRI_FAN:
1512         case RADEON_PRIM_TYPE_TRI_STRIP:
1513                 return nr < 3;
1514         default:
1515                 return 1;
1516         }
1517 }
1518
1519 typedef struct {
1520         unsigned int start;
1521         unsigned int finish;
1522         unsigned int prim;
1523         unsigned int numverts;
1524         unsigned int offset;
1525         unsigned int vc_format;
1526 } drm_radeon_tcl_prim_t;
1527
1528 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1529                                       struct drm_file *file_priv,
1530                                       struct drm_buf * buf,
1531                                       drm_radeon_tcl_prim_t * prim)
1532 {
1533         drm_radeon_private_t *dev_priv = dev->dev_private;
1534         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1535         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1536         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1537         int numverts = (int)prim->numverts;
1538         int nbox = sarea_priv->nbox;
1539         int i = 0;
1540         RING_LOCALS;
1541
1542         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1543                   prim->prim,
1544                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1545
1546         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1547                 DRM_ERROR("bad prim %x numverts %d\n",
1548                           prim->prim, prim->numverts);
1549                 return;
1550         }
1551
1552         do {
1553                 /* Emit the next cliprect */
1554                 if (i < nbox) {
1555                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1556                 }
1557
1558                 /* Emit the vertex buffer rendering commands */
1559                 BEGIN_RING(5);
1560
1561                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1562                 OUT_RING(offset);
1563                 OUT_RING(numverts);
1564                 OUT_RING(prim->vc_format);
1565                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1566                          RADEON_COLOR_ORDER_RGBA |
1567                          RADEON_VTX_FMT_RADEON_MODE |
1568                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1569
1570                 ADVANCE_RING();
1571
1572                 i++;
1573         } while (i < nbox);
1574 }
1575
1576 void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
1577 {
1578         drm_radeon_private_t *dev_priv = dev->dev_private;
1579         struct drm_radeon_master_private *master_priv = master->driver_priv;
1580         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1581         RING_LOCALS;
1582
1583         buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
1584
1585         /* Emit the vertex buffer age */
1586         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1587                 BEGIN_RING(3);
1588                 R600_DISPATCH_AGE(buf_priv->age);
1589                 ADVANCE_RING();
1590         } else {
1591                 BEGIN_RING(2);
1592                 RADEON_DISPATCH_AGE(buf_priv->age);
1593                 ADVANCE_RING();
1594         }
1595
1596         buf->pending = 1;
1597         buf->used = 0;
1598 }
1599
1600 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1601                                         struct drm_buf * buf, int start, int end)
1602 {
1603         drm_radeon_private_t *dev_priv = dev->dev_private;
1604         RING_LOCALS;
1605         DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1606
1607         if (start != end) {
1608                 int offset = (dev_priv->gart_buffers_offset
1609                               + buf->offset + start);
1610                 int dwords = (end - start + 3) / sizeof(u32);
1611
1612                 /* Indirect buffer data must be an even number of
1613                  * dwords, so if we've been given an odd number we must
1614                  * pad the data with a Type-2 CP packet.
1615                  */
1616                 if (dwords & 1) {
1617                         u32 *data = (u32 *)
1618                             ((char *)dev->agp_buffer_map->handle
1619                              + buf->offset + start);
1620                         data[dwords++] = RADEON_CP_PACKET2;
1621                 }
1622
1623                 /* Fire off the indirect buffer */
1624                 BEGIN_RING(3);
1625
1626                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1627                 OUT_RING(offset);
1628                 OUT_RING(dwords);
1629
1630                 ADVANCE_RING();
1631         }
1632 }
1633
1634 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1635                                        struct drm_master *master,
1636                                        struct drm_buf * elt_buf,
1637                                        drm_radeon_tcl_prim_t * prim)
1638 {
1639         drm_radeon_private_t *dev_priv = dev->dev_private;
1640         struct drm_radeon_master_private *master_priv = master->driver_priv;
1641         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1642         int offset = dev_priv->gart_buffers_offset + prim->offset;
1643         u32 *data;
1644         int dwords;
1645         int i = 0;
1646         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1647         int count = (prim->finish - start) / sizeof(u16);
1648         int nbox = sarea_priv->nbox;
1649
1650         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1651                   prim->prim,
1652                   prim->vc_format,
1653                   prim->start, prim->finish, prim->offset, prim->numverts);
1654
1655         if (bad_prim_vertex_nr(prim->prim, count)) {
1656                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1657                 return;
1658         }
1659
1660         if (start >= prim->finish || (prim->start & 0x7)) {
1661                 DRM_ERROR("buffer prim %d\n", prim->prim);
1662                 return;
1663         }
1664
1665         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1666
1667         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1668                         elt_buf->offset + prim->start);
1669
1670         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1671         data[1] = offset;
1672         data[2] = prim->numverts;
1673         data[3] = prim->vc_format;
1674         data[4] = (prim->prim |
1675                    RADEON_PRIM_WALK_IND |
1676                    RADEON_COLOR_ORDER_RGBA |
1677                    RADEON_VTX_FMT_RADEON_MODE |
1678                    (count << RADEON_NUM_VERTICES_SHIFT));
1679
1680         do {
1681                 if (i < nbox)
1682                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1683
1684                 radeon_cp_dispatch_indirect(dev, elt_buf,
1685                                             prim->start, prim->finish);
1686
1687                 i++;
1688         } while (i < nbox);
1689
1690 }
1691
1692 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1693
1694 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1695                                       struct drm_file *file_priv,
1696                                       drm_radeon_texture_t * tex,
1697                                       drm_radeon_tex_image_t * image)
1698 {
1699         drm_radeon_private_t *dev_priv = dev->dev_private;
1700         struct drm_buf *buf;
1701         u32 format;
1702         u32 *buffer;
1703         const u8 __user *data;
1704         int size, dwords, tex_width, blit_width, spitch;
1705         u32 height;
1706         int i;
1707         u32 texpitch, microtile;
1708         u32 offset, byte_offset;
1709         RING_LOCALS;
1710
1711         if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1712                 DRM_ERROR("Invalid destination offset\n");
1713                 return -EINVAL;
1714         }
1715
1716         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1717
1718         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1719          * up with the texture data from the host data blit, otherwise
1720          * part of the texture image may be corrupted.
1721          */
1722         BEGIN_RING(4);
1723         RADEON_FLUSH_CACHE();
1724         RADEON_WAIT_UNTIL_IDLE();
1725         ADVANCE_RING();
1726
1727         /* The compiler won't optimize away a division by a variable,
1728          * even if the only legal values are powers of two.  Thus, we'll
1729          * use a shift instead.
1730          */
1731         switch (tex->format) {
1732         case RADEON_TXFORMAT_ARGB8888:
1733         case RADEON_TXFORMAT_RGBA8888:
1734                 format = RADEON_COLOR_FORMAT_ARGB8888;
1735                 tex_width = tex->width * 4;
1736                 blit_width = image->width * 4;
1737                 break;
1738         case RADEON_TXFORMAT_AI88:
1739         case RADEON_TXFORMAT_ARGB1555:
1740         case RADEON_TXFORMAT_RGB565:
1741         case RADEON_TXFORMAT_ARGB4444:
1742         case RADEON_TXFORMAT_VYUY422:
1743         case RADEON_TXFORMAT_YVYU422:
1744                 format = RADEON_COLOR_FORMAT_RGB565;
1745                 tex_width = tex->width * 2;
1746                 blit_width = image->width * 2;
1747                 break;
1748         case RADEON_TXFORMAT_I8:
1749         case RADEON_TXFORMAT_RGB332:
1750                 format = RADEON_COLOR_FORMAT_CI8;
1751                 tex_width = tex->width * 1;
1752                 blit_width = image->width * 1;
1753                 break;
1754         default:
1755                 DRM_ERROR("invalid texture format %d\n", tex->format);
1756                 return -EINVAL;
1757         }
1758         spitch = blit_width >> 6;
1759         if (spitch == 0 && image->height > 1)
1760                 return -EINVAL;
1761
1762         texpitch = tex->pitch;
1763         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1764                 microtile = 1;
1765                 if (tex_width < 64) {
1766                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1767                         /* we got tiled coordinates, untile them */
1768                         image->x *= 2;
1769                 }
1770         } else
1771                 microtile = 0;
1772
1773         /* this might fail for zero-sized uploads - are those illegal? */
1774         if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1775                                 blit_width - 1)) {
1776                 DRM_ERROR("Invalid final destination offset\n");
1777                 return -EINVAL;
1778         }
1779
1780         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1781
1782         do {
1783                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1784                           tex->offset >> 10, tex->pitch, tex->format,
1785                           image->x, image->y, image->width, image->height);
1786
1787                 /* Make a copy of some parameters in case we have to
1788                  * update them for a multi-pass texture blit.
1789                  */
1790                 height = image->height;
1791                 data = (const u8 __user *)image->data;
1792
1793                 size = height * blit_width;
1794
1795                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1796                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1797                         size = height * blit_width;
1798                 } else if (size < 4 && size > 0) {
1799                         size = 4;
1800                 } else if (size == 0) {
1801                         return 0;
1802                 }
1803
1804                 buf = radeon_freelist_get(dev);
1805                 if (0 && !buf) {
1806                         radeon_do_cp_idle(dev_priv);
1807                         buf = radeon_freelist_get(dev);
1808                 }
1809                 if (!buf) {
1810                         DRM_DEBUG("EAGAIN\n");
1811                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1812                                 return -EFAULT;
1813                         return -EAGAIN;
1814                 }
1815
1816                 /* Dispatch the indirect buffer.
1817                  */
1818                 buffer =
1819                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1820                 dwords = size / 4;
1821
1822 #define RADEON_COPY_MT(_buf, _data, _width) \
1823         do { \
1824                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1825                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1826                         return -EFAULT; \
1827                 } \
1828         } while(0)
1829
1830                 if (microtile) {
1831                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1832                            however, we cannot use blitter directly for texture width < 64 bytes,
1833                            since minimum tex pitch is 64 bytes and we need this to match
1834                            the texture width, otherwise the blitter will tile it wrong.
1835                            Thus, tiling manually in this case. Additionally, need to special
1836                            case tex height = 1, since our actual image will have height 2
1837                            and we need to ensure we don't read beyond the texture size
1838                            from user space. */
1839                         if (tex->height == 1) {
1840                                 if (tex_width >= 64 || tex_width <= 16) {
1841                                         RADEON_COPY_MT(buffer, data,
1842                                                 (int)(tex_width * sizeof(u32)));
1843                                 } else if (tex_width == 32) {
1844                                         RADEON_COPY_MT(buffer, data, 16);
1845                                         RADEON_COPY_MT(buffer + 8,
1846                                                        data + 16, 16);
1847                                 }
1848                         } else if (tex_width >= 64 || tex_width == 16) {
1849                                 RADEON_COPY_MT(buffer, data,
1850                                                (int)(dwords * sizeof(u32)));
1851                         } else if (tex_width < 16) {
1852                                 for (i = 0; i < tex->height; i++) {
1853                                         RADEON_COPY_MT(buffer, data, tex_width);
1854                                         buffer += 4;
1855                                         data += tex_width;
1856                                 }
1857                         } else if (tex_width == 32) {
1858                                 /* TODO: make sure this works when not fitting in one buffer
1859                                    (i.e. 32bytes x 2048...) */
1860                                 for (i = 0; i < tex->height; i += 2) {
1861                                         RADEON_COPY_MT(buffer, data, 16);
1862                                         data += 16;
1863                                         RADEON_COPY_MT(buffer + 8, data, 16);
1864                                         data += 16;
1865                                         RADEON_COPY_MT(buffer + 4, data, 16);
1866                                         data += 16;
1867                                         RADEON_COPY_MT(buffer + 12, data, 16);
1868                                         data += 16;
1869                                         buffer += 16;
1870                                 }
1871                         }
1872                 } else {
1873                         if (tex_width >= 32) {
1874                                 /* Texture image width is larger than the minimum, so we
1875                                  * can upload it directly.
1876                                  */
1877                                 RADEON_COPY_MT(buffer, data,
1878                                                (int)(dwords * sizeof(u32)));
1879                         } else {
1880                                 /* Texture image width is less than the minimum, so we
1881                                  * need to pad out each image scanline to the minimum
1882                                  * width.
1883                                  */
1884                                 for (i = 0; i < tex->height; i++) {
1885                                         RADEON_COPY_MT(buffer, data, tex_width);
1886                                         buffer += 8;
1887                                         data += tex_width;
1888                                 }
1889                         }
1890                 }
1891
1892 #undef RADEON_COPY_MT
1893                 byte_offset = (image->y & ~2047) * blit_width;
1894                 buf->file_priv = file_priv;
1895                 buf->used = size;
1896                 offset = dev_priv->gart_buffers_offset + buf->offset;
1897                 BEGIN_RING(9);
1898                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1899                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1900                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1901                          RADEON_GMC_BRUSH_NONE |
1902                          (format << 8) |
1903                          RADEON_GMC_SRC_DATATYPE_COLOR |
1904                          RADEON_ROP3_S |
1905                          RADEON_DP_SRC_SOURCE_MEMORY |
1906                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1907                 OUT_RING((spitch << 22) | (offset >> 10));
1908                 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1909                 OUT_RING(0);
1910                 OUT_RING((image->x << 16) | (image->y % 2048));
1911                 OUT_RING((image->width << 16) | height);
1912                 RADEON_WAIT_UNTIL_2D_IDLE();
1913                 ADVANCE_RING();
1914                 COMMIT_RING();
1915
1916                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
1917
1918                 /* Update the input parameters for next time */
1919                 image->y += height;
1920                 image->height -= height;
1921                 image->data = (const u8 __user *)image->data + size;
1922         } while (image->height > 0);
1923
1924         /* Flush the pixel cache after the blit completes.  This ensures
1925          * the texture data is written out to memory before rendering
1926          * continues.
1927          */
1928         BEGIN_RING(4);
1929         RADEON_FLUSH_CACHE();
1930         RADEON_WAIT_UNTIL_2D_IDLE();
1931         ADVANCE_RING();
1932         COMMIT_RING();
1933
1934         return 0;
1935 }
1936
1937 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1938 {
1939         drm_radeon_private_t *dev_priv = dev->dev_private;
1940         int i;
1941         RING_LOCALS;
1942         DRM_DEBUG("\n");
1943
1944         BEGIN_RING(35);
1945
1946         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1947         OUT_RING(0x00000000);
1948
1949         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1950         for (i = 0; i < 32; i++) {
1951                 OUT_RING(stipple[i]);
1952         }
1953
1954         ADVANCE_RING();
1955 }
1956
1957 static void radeon_apply_surface_regs(int surf_index,
1958                                       drm_radeon_private_t *dev_priv)
1959 {
1960         if (!dev_priv->mmio)
1961                 return;
1962
1963         radeon_do_cp_idle(dev_priv);
1964
1965         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1966                      dev_priv->surfaces[surf_index].flags);
1967         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1968                      dev_priv->surfaces[surf_index].lower);
1969         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1970                      dev_priv->surfaces[surf_index].upper);
1971 }
1972
1973 /* Allocates a virtual surface
1974  * doesn't always allocate a real surface, will stretch an existing
1975  * surface when possible.
1976  *
1977  * Note that refcount can be at most 2, since during a free refcount=3
1978  * might mean we have to allocate a new surface which might not always
1979  * be available.
1980  * For example : we allocate three contiguous surfaces ABC. If B is
1981  * freed, we suddenly need two surfaces to store A and C, which might
1982  * not always be available.
1983  */
1984 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1985                          drm_radeon_private_t *dev_priv,
1986                          struct drm_file *file_priv)
1987 {
1988         struct radeon_virt_surface *s;
1989         int i;
1990         int virt_surface_index;
1991         uint32_t new_upper, new_lower;
1992
1993         new_lower = new->address;
1994         new_upper = new_lower + new->size - 1;
1995
1996         /* sanity check */
1997         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1998             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1999              RADEON_SURF_ADDRESS_FIXED_MASK)
2000             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
2001                 return -1;
2002
2003         /* make sure there is no overlap with existing surfaces */
2004         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2005                 if ((dev_priv->surfaces[i].refcount != 0) &&
2006                     (((new_lower >= dev_priv->surfaces[i].lower) &&
2007                       (new_lower < dev_priv->surfaces[i].upper)) ||
2008                      ((new_lower < dev_priv->surfaces[i].lower) &&
2009                       (new_upper > dev_priv->surfaces[i].lower)))) {
2010                         return -1;
2011                 }
2012         }
2013
2014         /* find a virtual surface */
2015         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
2016                 if (dev_priv->virt_surfaces[i].file_priv == NULL)
2017                         break;
2018         if (i == 2 * RADEON_MAX_SURFACES) {
2019                 return -1;
2020         }
2021         virt_surface_index = i;
2022
2023         /* try to reuse an existing surface */
2024         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2025                 /* extend before */
2026                 if ((dev_priv->surfaces[i].refcount == 1) &&
2027                     (new->flags == dev_priv->surfaces[i].flags) &&
2028                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
2029                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2030                         s->surface_index = i;
2031                         s->lower = new_lower;
2032                         s->upper = new_upper;
2033                         s->flags = new->flags;
2034                         s->file_priv = file_priv;
2035                         dev_priv->surfaces[i].refcount++;
2036                         dev_priv->surfaces[i].lower = s->lower;
2037                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2038                         return virt_surface_index;
2039                 }
2040
2041                 /* extend after */
2042                 if ((dev_priv->surfaces[i].refcount == 1) &&
2043                     (new->flags == dev_priv->surfaces[i].flags) &&
2044                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
2045                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2046                         s->surface_index = i;
2047                         s->lower = new_lower;
2048                         s->upper = new_upper;
2049                         s->flags = new->flags;
2050                         s->file_priv = file_priv;
2051                         dev_priv->surfaces[i].refcount++;
2052                         dev_priv->surfaces[i].upper = s->upper;
2053                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2054                         return virt_surface_index;
2055                 }
2056         }
2057
2058         /* okay, we need a new one */
2059         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2060                 if (dev_priv->surfaces[i].refcount == 0) {
2061                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2062                         s->surface_index = i;
2063                         s->lower = new_lower;
2064                         s->upper = new_upper;
2065                         s->flags = new->flags;
2066                         s->file_priv = file_priv;
2067                         dev_priv->surfaces[i].refcount = 1;
2068                         dev_priv->surfaces[i].lower = s->lower;
2069                         dev_priv->surfaces[i].upper = s->upper;
2070                         dev_priv->surfaces[i].flags = s->flags;
2071                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2072                         return virt_surface_index;
2073                 }
2074         }
2075
2076         /* we didn't find anything */
2077         return -1;
2078 }
2079
2080 static int free_surface(struct drm_file *file_priv,
2081                         drm_radeon_private_t * dev_priv,
2082                         int lower)
2083 {
2084         struct radeon_virt_surface *s;
2085         int i;
2086         /* find the virtual surface */
2087         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2088                 s = &(dev_priv->virt_surfaces[i]);
2089                 if (s->file_priv) {
2090                         if ((lower == s->lower) && (file_priv == s->file_priv))
2091                         {
2092                                 if (dev_priv->surfaces[s->surface_index].
2093                                     lower == s->lower)
2094                                         dev_priv->surfaces[s->surface_index].
2095                                             lower = s->upper;
2096
2097                                 if (dev_priv->surfaces[s->surface_index].
2098                                     upper == s->upper)
2099                                         dev_priv->surfaces[s->surface_index].
2100                                             upper = s->lower;
2101
2102                                 dev_priv->surfaces[s->surface_index].refcount--;
2103                                 if (dev_priv->surfaces[s->surface_index].
2104                                     refcount == 0)
2105                                         dev_priv->surfaces[s->surface_index].
2106                                             flags = 0;
2107                                 s->file_priv = NULL;
2108                                 radeon_apply_surface_regs(s->surface_index,
2109                                                           dev_priv);
2110                                 return 0;
2111                         }
2112                 }
2113         }
2114         return 1;
2115 }
2116
2117 static void radeon_surfaces_release(struct drm_file *file_priv,
2118                                     drm_radeon_private_t * dev_priv)
2119 {
2120         int i;
2121         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2122                 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2123                         free_surface(file_priv, dev_priv,
2124                                      dev_priv->virt_surfaces[i].lower);
2125         }
2126 }
2127
2128 /* ================================================================
2129  * IOCTL functions
2130  */
2131 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2132 {
2133         drm_radeon_private_t *dev_priv = dev->dev_private;
2134         drm_radeon_surface_alloc_t *alloc = data;
2135
2136         if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2137                 return -EINVAL;
2138         else
2139                 return 0;
2140 }
2141
2142 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2143 {
2144         drm_radeon_private_t *dev_priv = dev->dev_private;
2145         drm_radeon_surface_free_t *memfree = data;
2146
2147         if (free_surface(file_priv, dev_priv, memfree->address))
2148                 return -EINVAL;
2149         else
2150                 return 0;
2151 }
2152
2153 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2154 {
2155         drm_radeon_private_t *dev_priv = dev->dev_private;
2156         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2157         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2158         drm_radeon_clear_t *clear = data;
2159         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2160         DRM_DEBUG("\n");
2161
2162         LOCK_TEST_WITH_RETURN(dev, file_priv);
2163
2164         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2165
2166         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2167                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2168
2169         if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2170                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2171                 return -EFAULT;
2172
2173         radeon_cp_dispatch_clear(dev, file_priv->master, clear, depth_boxes);
2174
2175         COMMIT_RING();
2176         return 0;
2177 }
2178
2179 /* Not sure why this isn't set all the time:
2180  */
2181 static int radeon_do_init_pageflip(struct drm_device *dev, struct drm_master *master)
2182 {
2183         drm_radeon_private_t *dev_priv = dev->dev_private;
2184         struct drm_radeon_master_private *master_priv = master->driver_priv;
2185         RING_LOCALS;
2186
2187         DRM_DEBUG("\n");
2188
2189         BEGIN_RING(6);
2190         RADEON_WAIT_UNTIL_3D_IDLE();
2191         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2192         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2193                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2194         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2195         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2196                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2197         ADVANCE_RING();
2198
2199         dev_priv->page_flipping = 1;
2200
2201         if (master_priv->sarea_priv->pfCurrentPage != 1)
2202                 master_priv->sarea_priv->pfCurrentPage = 0;
2203
2204         return 0;
2205 }
2206
2207 /* Swapping and flipping are different operations, need different ioctls.
2208  * They can & should be intermixed to support multiple 3d windows.
2209  */
2210 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2211 {
2212         drm_radeon_private_t *dev_priv = dev->dev_private;
2213         DRM_DEBUG("\n");
2214
2215         LOCK_TEST_WITH_RETURN(dev, file_priv);
2216
2217         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2218
2219         if (!dev_priv->page_flipping)
2220                 radeon_do_init_pageflip(dev, file_priv->master);
2221
2222         radeon_cp_dispatch_flip(dev, file_priv->master);
2223
2224         COMMIT_RING();
2225         return 0;
2226 }
2227
2228 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2229 {
2230         drm_radeon_private_t *dev_priv = dev->dev_private;
2231         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2232         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2233
2234         DRM_DEBUG("\n");
2235
2236         LOCK_TEST_WITH_RETURN(dev, file_priv);
2237
2238         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2239
2240         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2241                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2242
2243         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2244                 r600_cp_dispatch_swap(dev, file_priv);
2245         else
2246                 radeon_cp_dispatch_swap(dev, file_priv->master);
2247         sarea_priv->ctx_owner = 0;
2248
2249         COMMIT_RING();
2250         return 0;
2251 }
2252
2253 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2254 {
2255         drm_radeon_private_t *dev_priv = dev->dev_private;
2256         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2257         drm_radeon_sarea_t *sarea_priv;
2258         struct drm_device_dma *dma = dev->dma;
2259         struct drm_buf *buf;
2260         drm_radeon_vertex_t *vertex = data;
2261         drm_radeon_tcl_prim_t prim;
2262
2263         LOCK_TEST_WITH_RETURN(dev, file_priv);
2264
2265         sarea_priv = master_priv->sarea_priv;
2266
2267         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2268                   DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2269
2270         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2271                 DRM_ERROR("buffer index %d (of %d max)\n",
2272                           vertex->idx, dma->buf_count - 1);
2273                 return -EINVAL;
2274         }
2275         if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2276                 DRM_ERROR("buffer prim %d\n", vertex->prim);
2277                 return -EINVAL;
2278         }
2279
2280         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2281         VB_AGE_TEST_WITH_RETURN(dev_priv);
2282
2283         buf = dma->buflist[vertex->idx];
2284
2285         if (buf->file_priv != file_priv) {
2286                 DRM_ERROR("process %d using buffer owned by %p\n",
2287                           DRM_CURRENTPID, buf->file_priv);
2288                 return -EINVAL;
2289         }
2290         if (buf->pending) {
2291                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2292                 return -EINVAL;
2293         }
2294
2295         /* Build up a prim_t record:
2296          */
2297         if (vertex->count) {
2298                 buf->used = vertex->count;      /* not used? */
2299
2300                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2301                         if (radeon_emit_state(dev_priv, file_priv,
2302                                               &sarea_priv->context_state,
2303                                               sarea_priv->tex_state,
2304                                               sarea_priv->dirty)) {
2305                                 DRM_ERROR("radeon_emit_state failed\n");
2306                                 return -EINVAL;
2307                         }
2308
2309                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2310                                                RADEON_UPLOAD_TEX1IMAGES |
2311                                                RADEON_UPLOAD_TEX2IMAGES |
2312                                                RADEON_REQUIRE_QUIESCENCE);
2313                 }
2314
2315                 prim.start = 0;
2316                 prim.finish = vertex->count;    /* unused */
2317                 prim.prim = vertex->prim;
2318                 prim.numverts = vertex->count;
2319                 prim.vc_format = sarea_priv->vc_format;
2320
2321                 radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
2322         }
2323
2324         if (vertex->discard) {
2325                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2326         }
2327
2328         COMMIT_RING();
2329         return 0;
2330 }
2331
2332 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2333 {
2334         drm_radeon_private_t *dev_priv = dev->dev_private;
2335         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2336         drm_radeon_sarea_t *sarea_priv;
2337         struct drm_device_dma *dma = dev->dma;
2338         struct drm_buf *buf;
2339         drm_radeon_indices_t *elts = data;
2340         drm_radeon_tcl_prim_t prim;
2341         int count;
2342
2343         LOCK_TEST_WITH_RETURN(dev, file_priv);
2344
2345         sarea_priv = master_priv->sarea_priv;
2346
2347         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2348                   DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2349                   elts->discard);
2350
2351         if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2352                 DRM_ERROR("buffer index %d (of %d max)\n",
2353                           elts->idx, dma->buf_count - 1);
2354                 return -EINVAL;
2355         }
2356         if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2357                 DRM_ERROR("buffer prim %d\n", elts->prim);
2358                 return -EINVAL;
2359         }
2360
2361         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2362         VB_AGE_TEST_WITH_RETURN(dev_priv);
2363
2364         buf = dma->buflist[elts->idx];
2365
2366         if (buf->file_priv != file_priv) {
2367                 DRM_ERROR("process %d using buffer owned by %p\n",
2368                           DRM_CURRENTPID, buf->file_priv);
2369                 return -EINVAL;
2370         }
2371         if (buf->pending) {
2372                 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2373                 return -EINVAL;
2374         }
2375
2376         count = (elts->end - elts->start) / sizeof(u16);
2377         elts->start -= RADEON_INDEX_PRIM_OFFSET;
2378
2379         if (elts->start & 0x7) {
2380                 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2381                 return -EINVAL;
2382         }
2383         if (elts->start < buf->used) {
2384                 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2385                 return -EINVAL;
2386         }
2387
2388         buf->used = elts->end;
2389
2390         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2391                 if (radeon_emit_state(dev_priv, file_priv,
2392                                       &sarea_priv->context_state,
2393                                       sarea_priv->tex_state,
2394                                       sarea_priv->dirty)) {
2395                         DRM_ERROR("radeon_emit_state failed\n");
2396                         return -EINVAL;
2397                 }
2398
2399                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2400                                        RADEON_UPLOAD_TEX1IMAGES |
2401                                        RADEON_UPLOAD_TEX2IMAGES |
2402                                        RADEON_REQUIRE_QUIESCENCE);
2403         }
2404
2405         /* Build up a prim_t record:
2406          */
2407         prim.start = elts->start;
2408         prim.finish = elts->end;
2409         prim.prim = elts->prim;
2410         prim.offset = 0;        /* offset from start of dma buffers */
2411         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2412         prim.vc_format = sarea_priv->vc_format;
2413
2414         radeon_cp_dispatch_indices(dev, file_priv->master, buf, &prim);
2415         if (elts->discard) {
2416                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2417         }
2418
2419         COMMIT_RING();
2420         return 0;
2421 }
2422
2423 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2424 {
2425         drm_radeon_private_t *dev_priv = dev->dev_private;
2426         drm_radeon_texture_t *tex = data;
2427         drm_radeon_tex_image_t image;
2428         int ret;
2429
2430         LOCK_TEST_WITH_RETURN(dev, file_priv);
2431
2432         if (tex->image == NULL) {
2433                 DRM_ERROR("null texture image!\n");
2434                 return -EINVAL;
2435         }
2436
2437         if (DRM_COPY_FROM_USER(&image,
2438                                (drm_radeon_tex_image_t __user *) tex->image,
2439                                sizeof(image)))
2440                 return -EFAULT;
2441
2442         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2443         VB_AGE_TEST_WITH_RETURN(dev_priv);
2444
2445         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2446                 ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2447         else
2448                 ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2449
2450         return ret;
2451 }
2452
2453 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2454 {
2455         drm_radeon_private_t *dev_priv = dev->dev_private;
2456         drm_radeon_stipple_t *stipple = data;
2457         u32 mask[32];
2458
2459         LOCK_TEST_WITH_RETURN(dev, file_priv);
2460
2461         if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2462                 return -EFAULT;
2463
2464         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2465
2466         radeon_cp_dispatch_stipple(dev, mask);
2467
2468         COMMIT_RING();
2469         return 0;
2470 }
2471
2472 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2473 {
2474         drm_radeon_private_t *dev_priv = dev->dev_private;
2475         struct drm_device_dma *dma = dev->dma;
2476         struct drm_buf *buf;
2477         drm_radeon_indirect_t *indirect = data;
2478         RING_LOCALS;
2479
2480         LOCK_TEST_WITH_RETURN(dev, file_priv);
2481
2482         DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2483                   indirect->idx, indirect->start, indirect->end,
2484                   indirect->discard);
2485
2486         if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2487                 DRM_ERROR("buffer index %d (of %d max)\n",
2488                           indirect->idx, dma->buf_count - 1);
2489                 return -EINVAL;
2490         }
2491
2492         buf = dma->buflist[indirect->idx];
2493
2494         if (buf->file_priv != file_priv) {
2495                 DRM_ERROR("process %d using buffer owned by %p\n",
2496                           DRM_CURRENTPID, buf->file_priv);
2497                 return -EINVAL;
2498         }
2499         if (buf->pending) {
2500                 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2501                 return -EINVAL;
2502         }
2503
2504         if (indirect->start < buf->used) {
2505                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2506                           indirect->start, buf->used);
2507                 return -EINVAL;
2508         }
2509
2510         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2511         VB_AGE_TEST_WITH_RETURN(dev_priv);
2512
2513         buf->used = indirect->end;
2514
2515         /* Dispatch the indirect buffer full of commands from the
2516          * X server.  This is insecure and is thus only available to
2517          * privileged clients.
2518          */
2519         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2520                 r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2521         else {
2522                 /* Wait for the 3D stream to idle before the indirect buffer
2523                  * containing 2D acceleration commands is processed.
2524                  */
2525                 BEGIN_RING(2);
2526                 RADEON_WAIT_UNTIL_3D_IDLE();
2527                 ADVANCE_RING();
2528                 radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2529         }
2530
2531         if (indirect->discard) {
2532                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2533         }
2534
2535         COMMIT_RING();
2536         return 0;
2537 }
2538
2539 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2540 {
2541         drm_radeon_private_t *dev_priv = dev->dev_private;
2542         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2543         drm_radeon_sarea_t *sarea_priv;
2544         struct drm_device_dma *dma = dev->dma;
2545         struct drm_buf *buf;
2546         drm_radeon_vertex2_t *vertex = data;
2547         int i;
2548         unsigned char laststate;
2549
2550         LOCK_TEST_WITH_RETURN(dev, file_priv);
2551
2552         sarea_priv = master_priv->sarea_priv;
2553
2554         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2555                   DRM_CURRENTPID, vertex->idx, vertex->discard);
2556
2557         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2558                 DRM_ERROR("buffer index %d (of %d max)\n",
2559                           vertex->idx, dma->buf_count - 1);
2560                 return -EINVAL;
2561         }
2562
2563         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2564         VB_AGE_TEST_WITH_RETURN(dev_priv);
2565
2566         buf = dma->buflist[vertex->idx];
2567
2568         if (buf->file_priv != file_priv) {
2569                 DRM_ERROR("process %d using buffer owned by %p\n",
2570                           DRM_CURRENTPID, buf->file_priv);
2571                 return -EINVAL;
2572         }
2573
2574         if (buf->pending) {
2575                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2576                 return -EINVAL;
2577         }
2578
2579         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2580                 return -EINVAL;
2581
2582         for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2583                 drm_radeon_prim_t prim;
2584                 drm_radeon_tcl_prim_t tclprim;
2585
2586                 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2587                         return -EFAULT;
2588
2589                 if (prim.stateidx != laststate) {
2590                         drm_radeon_state_t state;
2591
2592                         if (DRM_COPY_FROM_USER(&state,
2593                                                &vertex->state[prim.stateidx],
2594                                                sizeof(state)))
2595                                 return -EFAULT;
2596
2597                         if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2598                                 DRM_ERROR("radeon_emit_state2 failed\n");
2599                                 return -EINVAL;
2600                         }
2601
2602                         laststate = prim.stateidx;
2603                 }
2604
2605                 tclprim.start = prim.start;
2606                 tclprim.finish = prim.finish;
2607                 tclprim.prim = prim.prim;
2608                 tclprim.vc_format = prim.vc_format;
2609
2610                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2611                         tclprim.offset = prim.numverts * 64;
2612                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2613
2614                         radeon_cp_dispatch_indices(dev, file_priv->master, buf, &tclprim);
2615                 } else {
2616                         tclprim.numverts = prim.numverts;
2617                         tclprim.offset = 0;     /* not used */
2618
2619                         radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
2620                 }
2621
2622                 if (sarea_priv->nbox == 1)
2623                         sarea_priv->nbox = 0;
2624         }
2625
2626         if (vertex->discard) {
2627                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2628         }
2629
2630         COMMIT_RING();
2631         return 0;
2632 }
2633
2634 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2635                                struct drm_file *file_priv,
2636                                drm_radeon_cmd_header_t header,
2637                                drm_radeon_kcmd_buffer_t *cmdbuf)
2638 {
2639         int id = (int)header.packet.packet_id;
2640         int sz, reg;
2641         RING_LOCALS;
2642
2643         if (id >= RADEON_MAX_STATE_PACKETS)
2644                 return -EINVAL;
2645
2646         sz = packet[id].len;
2647         reg = packet[id].start;
2648
2649         if (sz * sizeof(u32) > drm_buffer_unprocessed(cmdbuf->buffer)) {
2650                 DRM_ERROR("Packet size provided larger than data provided\n");
2651                 return -EINVAL;
2652         }
2653
2654         if (radeon_check_and_fixup_packets(dev_priv, file_priv, id,
2655                                 cmdbuf->buffer)) {
2656                 DRM_ERROR("Packet verification failed\n");
2657                 return -EINVAL;
2658         }
2659
2660         BEGIN_RING(sz + 1);
2661         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2662         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2663         ADVANCE_RING();
2664
2665         return 0;
2666 }
2667
2668 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2669                                           drm_radeon_cmd_header_t header,
2670                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2671 {
2672         int sz = header.scalars.count;
2673         int start = header.scalars.offset;
2674         int stride = header.scalars.stride;
2675         RING_LOCALS;
2676
2677         BEGIN_RING(3 + sz);
2678         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2679         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2680         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2681         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2682         ADVANCE_RING();
2683         return 0;
2684 }
2685
2686 /* God this is ugly
2687  */
2688 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2689                                            drm_radeon_cmd_header_t header,
2690                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2691 {
2692         int sz = header.scalars.count;
2693         int start = ((unsigned int)header.scalars.offset) + 0x100;
2694         int stride = header.scalars.stride;
2695         RING_LOCALS;
2696
2697         BEGIN_RING(3 + sz);
2698         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2699         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2700         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2701         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2702         ADVANCE_RING();
2703         return 0;
2704 }
2705
2706 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2707                                           drm_radeon_cmd_header_t header,
2708                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2709 {
2710         int sz = header.vectors.count;
2711         int start = header.vectors.offset;
2712         int stride = header.vectors.stride;
2713         RING_LOCALS;
2714
2715         BEGIN_RING(5 + sz);
2716         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2717         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2718         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2719         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2720         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2721         ADVANCE_RING();
2722
2723         return 0;
2724 }
2725
2726 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2727                                           drm_radeon_cmd_header_t header,
2728                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2729 {
2730         int sz = header.veclinear.count * 4;
2731         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2732         RING_LOCALS;
2733
2734         if (!sz)
2735                 return 0;
2736         if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
2737                 return -EINVAL;
2738
2739         BEGIN_RING(5 + sz);
2740         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2741         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2742         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2743         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2744         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2745         ADVANCE_RING();
2746
2747         return 0;
2748 }
2749
2750 static int radeon_emit_packet3(struct drm_device * dev,
2751                                struct drm_file *file_priv,
2752                                drm_radeon_kcmd_buffer_t *cmdbuf)
2753 {
2754         drm_radeon_private_t *dev_priv = dev->dev_private;
2755         unsigned int cmdsz;
2756         int ret;
2757         RING_LOCALS;
2758
2759         DRM_DEBUG("\n");
2760
2761         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2762                                                   cmdbuf, &cmdsz))) {
2763                 DRM_ERROR("Packet verification failed\n");
2764                 return ret;
2765         }
2766
2767         BEGIN_RING(cmdsz);
2768         OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
2769         ADVANCE_RING();
2770
2771         return 0;
2772 }
2773
2774 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2775                                         struct drm_file *file_priv,
2776                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2777                                         int orig_nbox)
2778 {
2779         drm_radeon_private_t *dev_priv = dev->dev_private;
2780         struct drm_clip_rect box;
2781         unsigned int cmdsz;
2782         int ret;
2783         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2784         int i = 0;
2785         RING_LOCALS;
2786
2787         DRM_DEBUG("\n");
2788
2789         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2790                                                   cmdbuf, &cmdsz))) {
2791                 DRM_ERROR("Packet verification failed\n");
2792                 return ret;
2793         }
2794
2795         if (!orig_nbox)
2796                 goto out;
2797
2798         do {
2799                 if (i < cmdbuf->nbox) {
2800                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2801                                 return -EFAULT;
2802                         /* FIXME The second and subsequent times round
2803                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2804                          * calling emit_clip_rect(). This fixes a
2805                          * lockup on fast machines when sending
2806                          * several cliprects with a cmdbuf, as when
2807                          * waving a 2D window over a 3D
2808                          * window. Something in the commands from user
2809                          * space seems to hang the card when they're
2810                          * sent several times in a row. That would be
2811                          * the correct place to fix it but this works
2812                          * around it until I can figure that out - Tim
2813                          * Smith */
2814                         if (i) {
2815                                 BEGIN_RING(2);
2816                                 RADEON_WAIT_UNTIL_3D_IDLE();
2817                                 ADVANCE_RING();
2818                         }
2819                         radeon_emit_clip_rect(dev_priv, &box);
2820                 }
2821
2822                 BEGIN_RING(cmdsz);
2823                 OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
2824                 ADVANCE_RING();
2825
2826         } while (++i < cmdbuf->nbox);
2827         if (cmdbuf->nbox == 1)
2828                 cmdbuf->nbox = 0;
2829
2830         return 0;
2831       out:
2832         drm_buffer_advance(cmdbuf->buffer, cmdsz * 4);
2833         return 0;
2834 }
2835
2836 static int radeon_emit_wait(struct drm_device * dev, int flags)
2837 {
2838         drm_radeon_private_t *dev_priv = dev->dev_private;
2839         RING_LOCALS;
2840
2841         DRM_DEBUG("%x\n", flags);
2842         switch (flags) {
2843         case RADEON_WAIT_2D:
2844                 BEGIN_RING(2);
2845                 RADEON_WAIT_UNTIL_2D_IDLE();
2846                 ADVANCE_RING();
2847                 break;
2848         case RADEON_WAIT_3D:
2849                 BEGIN_RING(2);
2850                 RADEON_WAIT_UNTIL_3D_IDLE();
2851                 ADVANCE_RING();
2852                 break;
2853         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2854                 BEGIN_RING(2);
2855                 RADEON_WAIT_UNTIL_IDLE();
2856                 ADVANCE_RING();
2857                 break;
2858         default:
2859                 return -EINVAL;
2860         }
2861
2862         return 0;
2863 }
2864
2865 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data,
2866                 struct drm_file *file_priv)
2867 {
2868         drm_radeon_private_t *dev_priv = dev->dev_private;
2869         struct drm_device_dma *dma = dev->dma;
2870         struct drm_buf *buf = NULL;
2871         drm_radeon_cmd_header_t stack_header;
2872         int idx;
2873         drm_radeon_kcmd_buffer_t *cmdbuf = data;
2874         int orig_nbox;
2875
2876         LOCK_TEST_WITH_RETURN(dev, file_priv);
2877
2878         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2879         VB_AGE_TEST_WITH_RETURN(dev_priv);
2880
2881         if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2882                 return -EINVAL;
2883         }
2884
2885         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2886          * races between checking values and using those values in other code,
2887          * and simply to avoid a lot of function calls to copy in data.
2888          */
2889         if (cmdbuf->bufsz != 0) {
2890                 int rv;
2891                 void __user *buffer = cmdbuf->buffer;
2892                 rv = drm_buffer_alloc(&cmdbuf->buffer, cmdbuf->bufsz);
2893                 if (rv)
2894                         return rv;
2895                 rv = drm_buffer_copy_from_user(cmdbuf->buffer, buffer,
2896                                                 cmdbuf->bufsz);
2897                 if (rv) {
2898                         drm_buffer_free(cmdbuf->buffer);
2899                         return rv;
2900                 }
2901         } else
2902                 goto done;
2903
2904         orig_nbox = cmdbuf->nbox;
2905
2906         if (dev_priv->microcode_version == UCODE_R300) {
2907                 int temp;
2908                 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2909
2910                 drm_buffer_free(cmdbuf->buffer);
2911
2912                 return temp;
2913         }
2914
2915         /* microcode_version != r300 */
2916         while (drm_buffer_unprocessed(cmdbuf->buffer) >= sizeof(stack_header)) {
2917
2918                 drm_radeon_cmd_header_t *header;
2919                 header = drm_buffer_read_object(cmdbuf->buffer,
2920                                 sizeof(stack_header), &stack_header);
2921
2922                 switch (header->header.cmd_type) {
2923                 case RADEON_CMD_PACKET:
2924                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2925                         if (radeon_emit_packets
2926                             (dev_priv, file_priv, *header, cmdbuf)) {
2927                                 DRM_ERROR("radeon_emit_packets failed\n");
2928                                 goto err;
2929                         }
2930                         break;
2931
2932                 case RADEON_CMD_SCALARS:
2933                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2934                         if (radeon_emit_scalars(dev_priv, *header, cmdbuf)) {
2935                                 DRM_ERROR("radeon_emit_scalars failed\n");
2936                                 goto err;
2937                         }
2938                         break;
2939
2940                 case RADEON_CMD_VECTORS:
2941                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2942                         if (radeon_emit_vectors(dev_priv, *header, cmdbuf)) {
2943                                 DRM_ERROR("radeon_emit_vectors failed\n");
2944                                 goto err;
2945                         }
2946                         break;
2947
2948                 case RADEON_CMD_DMA_DISCARD:
2949                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2950                         idx = header->dma.buf_idx;
2951                         if (idx < 0 || idx >= dma->buf_count) {
2952                                 DRM_ERROR("buffer index %d (of %d max)\n",
2953                                           idx, dma->buf_count - 1);
2954                                 goto err;
2955                         }
2956
2957                         buf = dma->buflist[idx];
2958                         if (buf->file_priv != file_priv || buf->pending) {
2959                                 DRM_ERROR("bad buffer %p %p %d\n",
2960                                           buf->file_priv, file_priv,
2961                                           buf->pending);
2962                                 goto err;
2963                         }
2964
2965                         radeon_cp_discard_buffer(dev, file_priv->master, buf);
2966                         break;
2967
2968                 case RADEON_CMD_PACKET3:
2969                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2970                         if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2971                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2972                                 goto err;
2973                         }
2974                         break;
2975
2976                 case RADEON_CMD_PACKET3_CLIP:
2977                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2978                         if (radeon_emit_packet3_cliprect
2979                             (dev, file_priv, cmdbuf, orig_nbox)) {
2980                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2981                                 goto err;
2982                         }
2983                         break;
2984
2985                 case RADEON_CMD_SCALARS2:
2986                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2987                         if (radeon_emit_scalars2(dev_priv, *header, cmdbuf)) {
2988                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2989                                 goto err;
2990                         }
2991                         break;
2992
2993                 case RADEON_CMD_WAIT:
2994                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2995                         if (radeon_emit_wait(dev, header->wait.flags)) {
2996                                 DRM_ERROR("radeon_emit_wait failed\n");
2997                                 goto err;
2998                         }
2999                         break;
3000                 case RADEON_CMD_VECLINEAR:
3001                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
3002                         if (radeon_emit_veclinear(dev_priv, *header, cmdbuf)) {
3003                                 DRM_ERROR("radeon_emit_veclinear failed\n");
3004                                 goto err;
3005                         }
3006                         break;
3007
3008                 default:
3009                         DRM_ERROR("bad cmd_type %d at byte %d\n",
3010                                   header->header.cmd_type,
3011                                   cmdbuf->buffer->iterator);
3012                         goto err;
3013                 }
3014         }
3015
3016         drm_buffer_free(cmdbuf->buffer);
3017
3018       done:
3019         DRM_DEBUG("DONE\n");
3020         COMMIT_RING();
3021         return 0;
3022
3023       err:
3024         drm_buffer_free(cmdbuf->buffer);
3025         return -EINVAL;
3026 }
3027
3028 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3029 {
3030         drm_radeon_private_t *dev_priv = dev->dev_private;
3031         drm_radeon_getparam_t *param = data;
3032         int value;
3033
3034         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3035
3036         switch (param->param) {
3037         case RADEON_PARAM_GART_BUFFER_OFFSET:
3038                 value = dev_priv->gart_buffers_offset;
3039                 break;
3040         case RADEON_PARAM_LAST_FRAME:
3041                 dev_priv->stats.last_frame_reads++;
3042                 value = GET_SCRATCH(dev_priv, 0);
3043                 break;
3044         case RADEON_PARAM_LAST_DISPATCH:
3045                 value = GET_SCRATCH(dev_priv, 1);
3046                 break;
3047         case RADEON_PARAM_LAST_CLEAR:
3048                 dev_priv->stats.last_clear_reads++;
3049                 value = GET_SCRATCH(dev_priv, 2);
3050                 break;
3051         case RADEON_PARAM_IRQ_NR:
3052                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3053                         value = 0;
3054                 else
3055                         value = drm_dev_to_irq(dev);
3056                 break;
3057         case RADEON_PARAM_GART_BASE:
3058                 value = dev_priv->gart_vm_start;
3059                 break;
3060         case RADEON_PARAM_REGISTER_HANDLE:
3061                 value = dev_priv->mmio->offset;
3062                 break;
3063         case RADEON_PARAM_STATUS_HANDLE:
3064                 value = dev_priv->ring_rptr_offset;
3065                 break;
3066 #if BITS_PER_LONG == 32
3067                 /*
3068                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3069                  * pointer which can't fit into an int-sized variable.  According to
3070                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3071                  * not supporting it shouldn't be a problem.  If the same functionality
3072                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
3073                  * so backwards-compatibility for the embedded platforms can be
3074                  * maintained.  --davidm 4-Feb-2004.
3075                  */
3076         case RADEON_PARAM_SAREA_HANDLE:
3077                 /* The lock is the first dword in the sarea. */
3078                 /* no users of this parameter */
3079                 break;
3080 #endif
3081         case RADEON_PARAM_GART_TEX_HANDLE:
3082                 value = dev_priv->gart_textures_offset;
3083                 break;
3084         case RADEON_PARAM_SCRATCH_OFFSET:
3085                 if (!dev_priv->writeback_works)
3086                         return -EINVAL;
3087                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3088                         value = R600_SCRATCH_REG_OFFSET;
3089                 else
3090                         value = RADEON_SCRATCH_REG_OFFSET;
3091                 break;
3092         case RADEON_PARAM_CARD_TYPE:
3093                 if (dev_priv->flags & RADEON_IS_PCIE)
3094                         value = RADEON_CARD_PCIE;
3095                 else if (dev_priv->flags & RADEON_IS_AGP)
3096                         value = RADEON_CARD_AGP;
3097                 else
3098                         value = RADEON_CARD_PCI;
3099                 break;
3100         case RADEON_PARAM_VBLANK_CRTC:
3101                 value = radeon_vblank_crtc_get(dev);
3102                 break;
3103         case RADEON_PARAM_FB_LOCATION:
3104                 value = radeon_read_fb_location(dev_priv);
3105                 break;
3106         case RADEON_PARAM_NUM_GB_PIPES:
3107                 value = dev_priv->num_gb_pipes;
3108                 break;
3109         case RADEON_PARAM_NUM_Z_PIPES:
3110                 value = dev_priv->num_z_pipes;
3111                 break;
3112         default:
3113                 DRM_DEBUG("Invalid parameter %d\n", param->param);
3114                 return -EINVAL;
3115         }
3116
3117         if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3118                 DRM_ERROR("copy_to_user\n");
3119                 return -EFAULT;
3120         }
3121
3122         return 0;
3123 }
3124
3125 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3126 {
3127         drm_radeon_private_t *dev_priv = dev->dev_private;
3128         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
3129         drm_radeon_setparam_t *sp = data;
3130         struct drm_radeon_driver_file_fields *radeon_priv;
3131
3132         switch (sp->param) {
3133         case RADEON_SETPARAM_FB_LOCATION:
3134                 radeon_priv = file_priv->driver_priv;
3135                 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3136                     sp->value;
3137                 break;
3138         case RADEON_SETPARAM_SWITCH_TILING:
3139                 if (sp->value == 0) {
3140                         DRM_DEBUG("color tiling disabled\n");
3141                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3142                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3143                         if (master_priv->sarea_priv)
3144                                 master_priv->sarea_priv->tiling_enabled = 0;
3145                 } else if (sp->value == 1) {
3146                         DRM_DEBUG("color tiling enabled\n");
3147                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3148                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3149                         if (master_priv->sarea_priv)
3150                                 master_priv->sarea_priv->tiling_enabled = 1;
3151                 }
3152                 break;
3153         case RADEON_SETPARAM_PCIGART_LOCATION:
3154                 dev_priv->pcigart_offset = sp->value;
3155                 dev_priv->pcigart_offset_set = 1;
3156                 break;
3157         case RADEON_SETPARAM_NEW_MEMMAP:
3158                 dev_priv->new_memmap = sp->value;
3159                 break;
3160         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3161                 dev_priv->gart_info.table_size = sp->value;
3162                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3163                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3164                 break;
3165         case RADEON_SETPARAM_VBLANK_CRTC:
3166                 return radeon_vblank_crtc_set(dev, sp->value);
3167                 break;
3168         default:
3169                 DRM_DEBUG("Invalid parameter %d\n", sp->param);
3170                 return -EINVAL;
3171         }
3172
3173         return 0;
3174 }
3175
3176 /* When a client dies:
3177  *    - Check for and clean up flipped page state
3178  *    - Free any alloced GART memory.
3179  *    - Free any alloced radeon surfaces.
3180  *
3181  * DRM infrastructure takes care of reclaiming dma buffers.
3182  */
3183 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3184 {
3185         if (dev->dev_private) {
3186                 drm_radeon_private_t *dev_priv = dev->dev_private;
3187                 dev_priv->page_flipping = 0;
3188                 radeon_mem_release(file_priv, dev_priv->gart_heap);
3189                 radeon_mem_release(file_priv, dev_priv->fb_heap);
3190                 radeon_surfaces_release(file_priv, dev_priv);
3191         }
3192 }
3193
3194 void radeon_driver_lastclose(struct drm_device *dev)
3195 {
3196         radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3197         radeon_do_release(dev);
3198 }
3199
3200 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3201 {
3202         drm_radeon_private_t *dev_priv = dev->dev_private;
3203         struct drm_radeon_driver_file_fields *radeon_priv;
3204
3205         DRM_DEBUG("\n");
3206         radeon_priv = kmalloc(sizeof(*radeon_priv), GFP_KERNEL);
3207
3208         if (!radeon_priv)
3209                 return -ENOMEM;
3210
3211         file_priv->driver_priv = radeon_priv;
3212
3213         if (dev_priv)
3214                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3215         else
3216                 radeon_priv->radeon_fb_delta = 0;
3217         return 0;
3218 }
3219
3220 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3221 {
3222         struct drm_radeon_driver_file_fields *radeon_priv =
3223             file_priv->driver_priv;
3224
3225         kfree(radeon_priv);
3226 }
3227
3228 struct drm_ioctl_desc radeon_ioctls[] = {
3229         DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3230         DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3231         DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3232         DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3233         DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3234         DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3235         DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3236         DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3237         DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3238         DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3239         DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3240         DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3241         DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3242         DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3243         DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3244         DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3245         DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3246         DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3247         DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3248         DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3249         DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free, DRM_AUTH),
3250         DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3251         DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3252         DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3253         DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3254         DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3255         DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3256         DRM_IOCTL_DEF_DRV(RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
3257 };
3258
3259 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);