drm/i915: Export our request as a dma-buf fence on the reservation object
[cascardo/linux.git] / drivers / gpu / drm / i915 / i915_gem_execbuffer.c
1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28
29 #include <linux/dma_remapping.h>
30 #include <linux/reservation.h>
31 #include <linux/uaccess.h>
32
33 #include <drm/drmP.h>
34 #include <drm/i915_drm.h>
35
36 #include "i915_drv.h"
37 #include "i915_gem_dmabuf.h"
38 #include "i915_trace.h"
39 #include "intel_drv.h"
40 #include "intel_frontbuffer.h"
41
42 #define  __EXEC_OBJECT_HAS_PIN          (1<<31)
43 #define  __EXEC_OBJECT_HAS_FENCE        (1<<30)
44 #define  __EXEC_OBJECT_NEEDS_MAP        (1<<29)
45 #define  __EXEC_OBJECT_NEEDS_BIAS       (1<<28)
46 #define  __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */
47
48 #define BATCH_OFFSET_BIAS (256*1024)
49
50 struct i915_execbuffer_params {
51         struct drm_device               *dev;
52         struct drm_file                 *file;
53         struct i915_vma                 *batch;
54         u32                             dispatch_flags;
55         u32                             args_batch_start_offset;
56         struct intel_engine_cs          *engine;
57         struct i915_gem_context         *ctx;
58         struct drm_i915_gem_request     *request;
59 };
60
61 struct eb_vmas {
62         struct list_head vmas;
63         int and;
64         union {
65                 struct i915_vma *lut[0];
66                 struct hlist_head buckets[0];
67         };
68 };
69
70 static struct eb_vmas *
71 eb_create(struct drm_i915_gem_execbuffer2 *args)
72 {
73         struct eb_vmas *eb = NULL;
74
75         if (args->flags & I915_EXEC_HANDLE_LUT) {
76                 unsigned size = args->buffer_count;
77                 size *= sizeof(struct i915_vma *);
78                 size += sizeof(struct eb_vmas);
79                 eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
80         }
81
82         if (eb == NULL) {
83                 unsigned size = args->buffer_count;
84                 unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
85                 BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
86                 while (count > 2*size)
87                         count >>= 1;
88                 eb = kzalloc(count*sizeof(struct hlist_head) +
89                              sizeof(struct eb_vmas),
90                              GFP_TEMPORARY);
91                 if (eb == NULL)
92                         return eb;
93
94                 eb->and = count - 1;
95         } else
96                 eb->and = -args->buffer_count;
97
98         INIT_LIST_HEAD(&eb->vmas);
99         return eb;
100 }
101
102 static void
103 eb_reset(struct eb_vmas *eb)
104 {
105         if (eb->and >= 0)
106                 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
107 }
108
109 static struct i915_vma *
110 eb_get_batch(struct eb_vmas *eb)
111 {
112         struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
113
114         /*
115          * SNA is doing fancy tricks with compressing batch buffers, which leads
116          * to negative relocation deltas. Usually that works out ok since the
117          * relocate address is still positive, except when the batch is placed
118          * very low in the GTT. Ensure this doesn't happen.
119          *
120          * Note that actual hangs have only been observed on gen7, but for
121          * paranoia do it everywhere.
122          */
123         if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
124                 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
125
126         return vma;
127 }
128
129 static int
130 eb_lookup_vmas(struct eb_vmas *eb,
131                struct drm_i915_gem_exec_object2 *exec,
132                const struct drm_i915_gem_execbuffer2 *args,
133                struct i915_address_space *vm,
134                struct drm_file *file)
135 {
136         struct drm_i915_gem_object *obj;
137         struct list_head objects;
138         int i, ret;
139
140         INIT_LIST_HEAD(&objects);
141         spin_lock(&file->table_lock);
142         /* Grab a reference to the object and release the lock so we can lookup
143          * or create the VMA without using GFP_ATOMIC */
144         for (i = 0; i < args->buffer_count; i++) {
145                 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
146                 if (obj == NULL) {
147                         spin_unlock(&file->table_lock);
148                         DRM_DEBUG("Invalid object handle %d at index %d\n",
149                                    exec[i].handle, i);
150                         ret = -ENOENT;
151                         goto err;
152                 }
153
154                 if (!list_empty(&obj->obj_exec_link)) {
155                         spin_unlock(&file->table_lock);
156                         DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
157                                    obj, exec[i].handle, i);
158                         ret = -EINVAL;
159                         goto err;
160                 }
161
162                 i915_gem_object_get(obj);
163                 list_add_tail(&obj->obj_exec_link, &objects);
164         }
165         spin_unlock(&file->table_lock);
166
167         i = 0;
168         while (!list_empty(&objects)) {
169                 struct i915_vma *vma;
170
171                 obj = list_first_entry(&objects,
172                                        struct drm_i915_gem_object,
173                                        obj_exec_link);
174
175                 /*
176                  * NOTE: We can leak any vmas created here when something fails
177                  * later on. But that's no issue since vma_unbind can deal with
178                  * vmas which are not actually bound. And since only
179                  * lookup_or_create exists as an interface to get at the vma
180                  * from the (obj, vm) we don't run the risk of creating
181                  * duplicated vmas for the same vm.
182                  */
183                 vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
184                 if (IS_ERR(vma)) {
185                         DRM_DEBUG("Failed to lookup VMA\n");
186                         ret = PTR_ERR(vma);
187                         goto err;
188                 }
189
190                 /* Transfer ownership from the objects list to the vmas list. */
191                 list_add_tail(&vma->exec_list, &eb->vmas);
192                 list_del_init(&obj->obj_exec_link);
193
194                 vma->exec_entry = &exec[i];
195                 if (eb->and < 0) {
196                         eb->lut[i] = vma;
197                 } else {
198                         uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
199                         vma->exec_handle = handle;
200                         hlist_add_head(&vma->exec_node,
201                                        &eb->buckets[handle & eb->and]);
202                 }
203                 ++i;
204         }
205
206         return 0;
207
208
209 err:
210         while (!list_empty(&objects)) {
211                 obj = list_first_entry(&objects,
212                                        struct drm_i915_gem_object,
213                                        obj_exec_link);
214                 list_del_init(&obj->obj_exec_link);
215                 i915_gem_object_put(obj);
216         }
217         /*
218          * Objects already transfered to the vmas list will be unreferenced by
219          * eb_destroy.
220          */
221
222         return ret;
223 }
224
225 static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
226 {
227         if (eb->and < 0) {
228                 if (handle >= -eb->and)
229                         return NULL;
230                 return eb->lut[handle];
231         } else {
232                 struct hlist_head *head;
233                 struct i915_vma *vma;
234
235                 head = &eb->buckets[handle & eb->and];
236                 hlist_for_each_entry(vma, head, exec_node) {
237                         if (vma->exec_handle == handle)
238                                 return vma;
239                 }
240                 return NULL;
241         }
242 }
243
244 static void
245 i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
246 {
247         struct drm_i915_gem_exec_object2 *entry;
248         struct drm_i915_gem_object *obj = vma->obj;
249
250         if (!drm_mm_node_allocated(&vma->node))
251                 return;
252
253         entry = vma->exec_entry;
254
255         if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
256                 i915_gem_object_unpin_fence(obj);
257
258         if (entry->flags & __EXEC_OBJECT_HAS_PIN)
259                 __i915_vma_unpin(vma);
260
261         entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
262 }
263
264 static void eb_destroy(struct eb_vmas *eb)
265 {
266         while (!list_empty(&eb->vmas)) {
267                 struct i915_vma *vma;
268
269                 vma = list_first_entry(&eb->vmas,
270                                        struct i915_vma,
271                                        exec_list);
272                 list_del_init(&vma->exec_list);
273                 i915_gem_execbuffer_unreserve_vma(vma);
274                 i915_gem_object_put(vma->obj);
275         }
276         kfree(eb);
277 }
278
279 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
280 {
281         return (HAS_LLC(obj->base.dev) ||
282                 obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
283                 obj->cache_level != I915_CACHE_NONE);
284 }
285
286 /* Used to convert any address to canonical form.
287  * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
288  * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
289  * addresses to be in a canonical form:
290  * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
291  * canonical form [63:48] == [47]."
292  */
293 #define GEN8_HIGH_ADDRESS_BIT 47
294 static inline uint64_t gen8_canonical_addr(uint64_t address)
295 {
296         return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
297 }
298
299 static inline uint64_t gen8_noncanonical_addr(uint64_t address)
300 {
301         return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1);
302 }
303
304 static inline uint64_t
305 relocation_target(struct drm_i915_gem_relocation_entry *reloc,
306                   uint64_t target_offset)
307 {
308         return gen8_canonical_addr((int)reloc->delta + target_offset);
309 }
310
311 static int
312 relocate_entry_cpu(struct drm_i915_gem_object *obj,
313                    struct drm_i915_gem_relocation_entry *reloc,
314                    uint64_t target_offset)
315 {
316         struct drm_device *dev = obj->base.dev;
317         uint32_t page_offset = offset_in_page(reloc->offset);
318         uint64_t delta = relocation_target(reloc, target_offset);
319         char *vaddr;
320         int ret;
321
322         ret = i915_gem_object_set_to_cpu_domain(obj, true);
323         if (ret)
324                 return ret;
325
326         vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
327                                 reloc->offset >> PAGE_SHIFT));
328         *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
329
330         if (INTEL_INFO(dev)->gen >= 8) {
331                 page_offset = offset_in_page(page_offset + sizeof(uint32_t));
332
333                 if (page_offset == 0) {
334                         kunmap_atomic(vaddr);
335                         vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
336                             (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
337                 }
338
339                 *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
340         }
341
342         kunmap_atomic(vaddr);
343
344         return 0;
345 }
346
347 static int
348 relocate_entry_gtt(struct drm_i915_gem_object *obj,
349                    struct drm_i915_gem_relocation_entry *reloc,
350                    uint64_t target_offset)
351 {
352         struct drm_device *dev = obj->base.dev;
353         struct drm_i915_private *dev_priv = to_i915(dev);
354         struct i915_ggtt *ggtt = &dev_priv->ggtt;
355         uint64_t delta = relocation_target(reloc, target_offset);
356         uint64_t offset;
357         void __iomem *reloc_page;
358         int ret;
359
360         ret = i915_gem_object_set_to_gtt_domain(obj, true);
361         if (ret)
362                 return ret;
363
364         ret = i915_gem_object_put_fence(obj);
365         if (ret)
366                 return ret;
367
368         /* Map the page containing the relocation we're going to perform.  */
369         offset = i915_gem_obj_ggtt_offset(obj);
370         offset += reloc->offset;
371         reloc_page = io_mapping_map_atomic_wc(ggtt->mappable,
372                                               offset & PAGE_MASK);
373         iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
374
375         if (INTEL_INFO(dev)->gen >= 8) {
376                 offset += sizeof(uint32_t);
377
378                 if (offset_in_page(offset) == 0) {
379                         io_mapping_unmap_atomic(reloc_page);
380                         reloc_page =
381                                 io_mapping_map_atomic_wc(ggtt->mappable,
382                                                          offset);
383                 }
384
385                 iowrite32(upper_32_bits(delta),
386                           reloc_page + offset_in_page(offset));
387         }
388
389         io_mapping_unmap_atomic(reloc_page);
390
391         return 0;
392 }
393
394 static void
395 clflush_write32(void *addr, uint32_t value)
396 {
397         /* This is not a fast path, so KISS. */
398         drm_clflush_virt_range(addr, sizeof(uint32_t));
399         *(uint32_t *)addr = value;
400         drm_clflush_virt_range(addr, sizeof(uint32_t));
401 }
402
403 static int
404 relocate_entry_clflush(struct drm_i915_gem_object *obj,
405                        struct drm_i915_gem_relocation_entry *reloc,
406                        uint64_t target_offset)
407 {
408         struct drm_device *dev = obj->base.dev;
409         uint32_t page_offset = offset_in_page(reloc->offset);
410         uint64_t delta = relocation_target(reloc, target_offset);
411         char *vaddr;
412         int ret;
413
414         ret = i915_gem_object_set_to_gtt_domain(obj, true);
415         if (ret)
416                 return ret;
417
418         vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
419                                 reloc->offset >> PAGE_SHIFT));
420         clflush_write32(vaddr + page_offset, lower_32_bits(delta));
421
422         if (INTEL_INFO(dev)->gen >= 8) {
423                 page_offset = offset_in_page(page_offset + sizeof(uint32_t));
424
425                 if (page_offset == 0) {
426                         kunmap_atomic(vaddr);
427                         vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
428                             (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
429                 }
430
431                 clflush_write32(vaddr + page_offset, upper_32_bits(delta));
432         }
433
434         kunmap_atomic(vaddr);
435
436         return 0;
437 }
438
439 static bool object_is_idle(struct drm_i915_gem_object *obj)
440 {
441         unsigned long active = i915_gem_object_get_active(obj);
442         int idx;
443
444         for_each_active(active, idx) {
445                 if (!i915_gem_active_is_idle(&obj->last_read[idx],
446                                              &obj->base.dev->struct_mutex))
447                         return false;
448         }
449
450         return true;
451 }
452
453 static int
454 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
455                                    struct eb_vmas *eb,
456                                    struct drm_i915_gem_relocation_entry *reloc)
457 {
458         struct drm_device *dev = obj->base.dev;
459         struct drm_gem_object *target_obj;
460         struct drm_i915_gem_object *target_i915_obj;
461         struct i915_vma *target_vma;
462         uint64_t target_offset;
463         int ret;
464
465         /* we've already hold a reference to all valid objects */
466         target_vma = eb_get_vma(eb, reloc->target_handle);
467         if (unlikely(target_vma == NULL))
468                 return -ENOENT;
469         target_i915_obj = target_vma->obj;
470         target_obj = &target_vma->obj->base;
471
472         target_offset = gen8_canonical_addr(target_vma->node.start);
473
474         /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
475          * pipe_control writes because the gpu doesn't properly redirect them
476          * through the ppgtt for non_secure batchbuffers. */
477         if (unlikely(IS_GEN6(dev) &&
478             reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
479                 ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
480                                     PIN_GLOBAL);
481                 if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
482                         return ret;
483         }
484
485         /* Validate that the target is in a valid r/w GPU domain */
486         if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
487                 DRM_DEBUG("reloc with multiple write domains: "
488                           "obj %p target %d offset %d "
489                           "read %08x write %08x",
490                           obj, reloc->target_handle,
491                           (int) reloc->offset,
492                           reloc->read_domains,
493                           reloc->write_domain);
494                 return -EINVAL;
495         }
496         if (unlikely((reloc->write_domain | reloc->read_domains)
497                      & ~I915_GEM_GPU_DOMAINS)) {
498                 DRM_DEBUG("reloc with read/write non-GPU domains: "
499                           "obj %p target %d offset %d "
500                           "read %08x write %08x",
501                           obj, reloc->target_handle,
502                           (int) reloc->offset,
503                           reloc->read_domains,
504                           reloc->write_domain);
505                 return -EINVAL;
506         }
507
508         target_obj->pending_read_domains |= reloc->read_domains;
509         target_obj->pending_write_domain |= reloc->write_domain;
510
511         /* If the relocation already has the right value in it, no
512          * more work needs to be done.
513          */
514         if (target_offset == reloc->presumed_offset)
515                 return 0;
516
517         /* Check that the relocation address is valid... */
518         if (unlikely(reloc->offset >
519                 obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
520                 DRM_DEBUG("Relocation beyond object bounds: "
521                           "obj %p target %d offset %d size %d.\n",
522                           obj, reloc->target_handle,
523                           (int) reloc->offset,
524                           (int) obj->base.size);
525                 return -EINVAL;
526         }
527         if (unlikely(reloc->offset & 3)) {
528                 DRM_DEBUG("Relocation not 4-byte aligned: "
529                           "obj %p target %d offset %d.\n",
530                           obj, reloc->target_handle,
531                           (int) reloc->offset);
532                 return -EINVAL;
533         }
534
535         /* We can't wait for rendering with pagefaults disabled */
536         if (pagefault_disabled() && !object_is_idle(obj))
537                 return -EFAULT;
538
539         if (use_cpu_reloc(obj))
540                 ret = relocate_entry_cpu(obj, reloc, target_offset);
541         else if (obj->map_and_fenceable)
542                 ret = relocate_entry_gtt(obj, reloc, target_offset);
543         else if (static_cpu_has(X86_FEATURE_CLFLUSH))
544                 ret = relocate_entry_clflush(obj, reloc, target_offset);
545         else {
546                 WARN_ONCE(1, "Impossible case in relocation handling\n");
547                 ret = -ENODEV;
548         }
549
550         if (ret)
551                 return ret;
552
553         /* and update the user's relocation entry */
554         reloc->presumed_offset = target_offset;
555
556         return 0;
557 }
558
559 static int
560 i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
561                                  struct eb_vmas *eb)
562 {
563 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
564         struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
565         struct drm_i915_gem_relocation_entry __user *user_relocs;
566         struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
567         int remain, ret;
568
569         user_relocs = u64_to_user_ptr(entry->relocs_ptr);
570
571         remain = entry->relocation_count;
572         while (remain) {
573                 struct drm_i915_gem_relocation_entry *r = stack_reloc;
574                 int count = remain;
575                 if (count > ARRAY_SIZE(stack_reloc))
576                         count = ARRAY_SIZE(stack_reloc);
577                 remain -= count;
578
579                 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
580                         return -EFAULT;
581
582                 do {
583                         u64 offset = r->presumed_offset;
584
585                         ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
586                         if (ret)
587                                 return ret;
588
589                         if (r->presumed_offset != offset &&
590                             __put_user(r->presumed_offset, &user_relocs->presumed_offset)) {
591                                 return -EFAULT;
592                         }
593
594                         user_relocs++;
595                         r++;
596                 } while (--count);
597         }
598
599         return 0;
600 #undef N_RELOC
601 }
602
603 static int
604 i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
605                                       struct eb_vmas *eb,
606                                       struct drm_i915_gem_relocation_entry *relocs)
607 {
608         const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
609         int i, ret;
610
611         for (i = 0; i < entry->relocation_count; i++) {
612                 ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
613                 if (ret)
614                         return ret;
615         }
616
617         return 0;
618 }
619
620 static int
621 i915_gem_execbuffer_relocate(struct eb_vmas *eb)
622 {
623         struct i915_vma *vma;
624         int ret = 0;
625
626         /* This is the fast path and we cannot handle a pagefault whilst
627          * holding the struct mutex lest the user pass in the relocations
628          * contained within a mmaped bo. For in such a case we, the page
629          * fault handler would call i915_gem_fault() and we would try to
630          * acquire the struct mutex again. Obviously this is bad and so
631          * lockdep complains vehemently.
632          */
633         pagefault_disable();
634         list_for_each_entry(vma, &eb->vmas, exec_list) {
635                 ret = i915_gem_execbuffer_relocate_vma(vma, eb);
636                 if (ret)
637                         break;
638         }
639         pagefault_enable();
640
641         return ret;
642 }
643
644 static bool only_mappable_for_reloc(unsigned int flags)
645 {
646         return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
647                 __EXEC_OBJECT_NEEDS_MAP;
648 }
649
650 static int
651 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
652                                 struct intel_engine_cs *engine,
653                                 bool *need_reloc)
654 {
655         struct drm_i915_gem_object *obj = vma->obj;
656         struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
657         uint64_t flags;
658         int ret;
659
660         flags = PIN_USER;
661         if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
662                 flags |= PIN_GLOBAL;
663
664         if (!drm_mm_node_allocated(&vma->node)) {
665                 /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
666                  * limit address to the first 4GBs for unflagged objects.
667                  */
668                 if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
669                         flags |= PIN_ZONE_4G;
670                 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
671                         flags |= PIN_GLOBAL | PIN_MAPPABLE;
672                 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
673                         flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
674                 if (entry->flags & EXEC_OBJECT_PINNED)
675                         flags |= entry->offset | PIN_OFFSET_FIXED;
676                 if ((flags & PIN_MAPPABLE) == 0)
677                         flags |= PIN_HIGH;
678         }
679
680         ret = i915_vma_pin(vma,
681                            entry->pad_to_size,
682                            entry->alignment,
683                            flags);
684         if ((ret == -ENOSPC || ret == -E2BIG) &&
685             only_mappable_for_reloc(entry->flags))
686                 ret = i915_vma_pin(vma,
687                                    entry->pad_to_size,
688                                    entry->alignment,
689                                    flags & ~PIN_MAPPABLE);
690         if (ret)
691                 return ret;
692
693         entry->flags |= __EXEC_OBJECT_HAS_PIN;
694
695         if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
696                 ret = i915_gem_object_get_fence(obj);
697                 if (ret)
698                         return ret;
699
700                 if (i915_gem_object_pin_fence(obj))
701                         entry->flags |= __EXEC_OBJECT_HAS_FENCE;
702         }
703
704         if (entry->offset != vma->node.start) {
705                 entry->offset = vma->node.start;
706                 *need_reloc = true;
707         }
708
709         if (entry->flags & EXEC_OBJECT_WRITE) {
710                 obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
711                 obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
712         }
713
714         return 0;
715 }
716
717 static bool
718 need_reloc_mappable(struct i915_vma *vma)
719 {
720         struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
721
722         if (entry->relocation_count == 0)
723                 return false;
724
725         if (!i915_vma_is_ggtt(vma))
726                 return false;
727
728         /* See also use_cpu_reloc() */
729         if (HAS_LLC(vma->obj->base.dev))
730                 return false;
731
732         if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
733                 return false;
734
735         return true;
736 }
737
738 static bool
739 eb_vma_misplaced(struct i915_vma *vma)
740 {
741         struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
742         struct drm_i915_gem_object *obj = vma->obj;
743
744         WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
745                 !i915_vma_is_ggtt(vma));
746
747         if (entry->alignment &&
748             vma->node.start & (entry->alignment - 1))
749                 return true;
750
751         if (vma->node.size < entry->pad_to_size)
752                 return true;
753
754         if (entry->flags & EXEC_OBJECT_PINNED &&
755             vma->node.start != entry->offset)
756                 return true;
757
758         if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
759             vma->node.start < BATCH_OFFSET_BIAS)
760                 return true;
761
762         /* avoid costly ping-pong once a batch bo ended up non-mappable */
763         if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
764                 return !only_mappable_for_reloc(entry->flags);
765
766         if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
767             (vma->node.start + vma->node.size - 1) >> 32)
768                 return true;
769
770         return false;
771 }
772
773 static int
774 i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
775                             struct list_head *vmas,
776                             struct i915_gem_context *ctx,
777                             bool *need_relocs)
778 {
779         struct drm_i915_gem_object *obj;
780         struct i915_vma *vma;
781         struct i915_address_space *vm;
782         struct list_head ordered_vmas;
783         struct list_head pinned_vmas;
784         bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
785         int retry;
786
787         vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
788
789         INIT_LIST_HEAD(&ordered_vmas);
790         INIT_LIST_HEAD(&pinned_vmas);
791         while (!list_empty(vmas)) {
792                 struct drm_i915_gem_exec_object2 *entry;
793                 bool need_fence, need_mappable;
794
795                 vma = list_first_entry(vmas, struct i915_vma, exec_list);
796                 obj = vma->obj;
797                 entry = vma->exec_entry;
798
799                 if (ctx->flags & CONTEXT_NO_ZEROMAP)
800                         entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
801
802                 if (!has_fenced_gpu_access)
803                         entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
804                 need_fence =
805                         entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
806                         obj->tiling_mode != I915_TILING_NONE;
807                 need_mappable = need_fence || need_reloc_mappable(vma);
808
809                 if (entry->flags & EXEC_OBJECT_PINNED)
810                         list_move_tail(&vma->exec_list, &pinned_vmas);
811                 else if (need_mappable) {
812                         entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
813                         list_move(&vma->exec_list, &ordered_vmas);
814                 } else
815                         list_move_tail(&vma->exec_list, &ordered_vmas);
816
817                 obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
818                 obj->base.pending_write_domain = 0;
819         }
820         list_splice(&ordered_vmas, vmas);
821         list_splice(&pinned_vmas, vmas);
822
823         /* Attempt to pin all of the buffers into the GTT.
824          * This is done in 3 phases:
825          *
826          * 1a. Unbind all objects that do not match the GTT constraints for
827          *     the execbuffer (fenceable, mappable, alignment etc).
828          * 1b. Increment pin count for already bound objects.
829          * 2.  Bind new objects.
830          * 3.  Decrement pin count.
831          *
832          * This avoid unnecessary unbinding of later objects in order to make
833          * room for the earlier objects *unless* we need to defragment.
834          */
835         retry = 0;
836         do {
837                 int ret = 0;
838
839                 /* Unbind any ill-fitting objects or pin. */
840                 list_for_each_entry(vma, vmas, exec_list) {
841                         if (!drm_mm_node_allocated(&vma->node))
842                                 continue;
843
844                         if (eb_vma_misplaced(vma))
845                                 ret = i915_vma_unbind(vma);
846                         else
847                                 ret = i915_gem_execbuffer_reserve_vma(vma,
848                                                                       engine,
849                                                                       need_relocs);
850                         if (ret)
851                                 goto err;
852                 }
853
854                 /* Bind fresh objects */
855                 list_for_each_entry(vma, vmas, exec_list) {
856                         if (drm_mm_node_allocated(&vma->node))
857                                 continue;
858
859                         ret = i915_gem_execbuffer_reserve_vma(vma, engine,
860                                                               need_relocs);
861                         if (ret)
862                                 goto err;
863                 }
864
865 err:
866                 if (ret != -ENOSPC || retry++)
867                         return ret;
868
869                 /* Decrement pin count for bound objects */
870                 list_for_each_entry(vma, vmas, exec_list)
871                         i915_gem_execbuffer_unreserve_vma(vma);
872
873                 ret = i915_gem_evict_vm(vm, true);
874                 if (ret)
875                         return ret;
876         } while (1);
877 }
878
879 static int
880 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
881                                   struct drm_i915_gem_execbuffer2 *args,
882                                   struct drm_file *file,
883                                   struct intel_engine_cs *engine,
884                                   struct eb_vmas *eb,
885                                   struct drm_i915_gem_exec_object2 *exec,
886                                   struct i915_gem_context *ctx)
887 {
888         struct drm_i915_gem_relocation_entry *reloc;
889         struct i915_address_space *vm;
890         struct i915_vma *vma;
891         bool need_relocs;
892         int *reloc_offset;
893         int i, total, ret;
894         unsigned count = args->buffer_count;
895
896         vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
897
898         /* We may process another execbuffer during the unlock... */
899         while (!list_empty(&eb->vmas)) {
900                 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
901                 list_del_init(&vma->exec_list);
902                 i915_gem_execbuffer_unreserve_vma(vma);
903                 i915_gem_object_put(vma->obj);
904         }
905
906         mutex_unlock(&dev->struct_mutex);
907
908         total = 0;
909         for (i = 0; i < count; i++)
910                 total += exec[i].relocation_count;
911
912         reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
913         reloc = drm_malloc_ab(total, sizeof(*reloc));
914         if (reloc == NULL || reloc_offset == NULL) {
915                 drm_free_large(reloc);
916                 drm_free_large(reloc_offset);
917                 mutex_lock(&dev->struct_mutex);
918                 return -ENOMEM;
919         }
920
921         total = 0;
922         for (i = 0; i < count; i++) {
923                 struct drm_i915_gem_relocation_entry __user *user_relocs;
924                 u64 invalid_offset = (u64)-1;
925                 int j;
926
927                 user_relocs = u64_to_user_ptr(exec[i].relocs_ptr);
928
929                 if (copy_from_user(reloc+total, user_relocs,
930                                    exec[i].relocation_count * sizeof(*reloc))) {
931                         ret = -EFAULT;
932                         mutex_lock(&dev->struct_mutex);
933                         goto err;
934                 }
935
936                 /* As we do not update the known relocation offsets after
937                  * relocating (due to the complexities in lock handling),
938                  * we need to mark them as invalid now so that we force the
939                  * relocation processing next time. Just in case the target
940                  * object is evicted and then rebound into its old
941                  * presumed_offset before the next execbuffer - if that
942                  * happened we would make the mistake of assuming that the
943                  * relocations were valid.
944                  */
945                 for (j = 0; j < exec[i].relocation_count; j++) {
946                         if (__copy_to_user(&user_relocs[j].presumed_offset,
947                                            &invalid_offset,
948                                            sizeof(invalid_offset))) {
949                                 ret = -EFAULT;
950                                 mutex_lock(&dev->struct_mutex);
951                                 goto err;
952                         }
953                 }
954
955                 reloc_offset[i] = total;
956                 total += exec[i].relocation_count;
957         }
958
959         ret = i915_mutex_lock_interruptible(dev);
960         if (ret) {
961                 mutex_lock(&dev->struct_mutex);
962                 goto err;
963         }
964
965         /* reacquire the objects */
966         eb_reset(eb);
967         ret = eb_lookup_vmas(eb, exec, args, vm, file);
968         if (ret)
969                 goto err;
970
971         need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
972         ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
973                                           &need_relocs);
974         if (ret)
975                 goto err;
976
977         list_for_each_entry(vma, &eb->vmas, exec_list) {
978                 int offset = vma->exec_entry - exec;
979                 ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
980                                                             reloc + reloc_offset[offset]);
981                 if (ret)
982                         goto err;
983         }
984
985         /* Leave the user relocations as are, this is the painfully slow path,
986          * and we want to avoid the complication of dropping the lock whilst
987          * having buffers reserved in the aperture and so causing spurious
988          * ENOSPC for random operations.
989          */
990
991 err:
992         drm_free_large(reloc);
993         drm_free_large(reloc_offset);
994         return ret;
995 }
996
997 static unsigned int eb_other_engines(struct drm_i915_gem_request *req)
998 {
999         unsigned int mask;
1000
1001         mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK;
1002         mask <<= I915_BO_ACTIVE_SHIFT;
1003
1004         return mask;
1005 }
1006
1007 static int
1008 i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
1009                                 struct list_head *vmas)
1010 {
1011         const unsigned int other_rings = eb_other_engines(req);
1012         struct i915_vma *vma;
1013         uint32_t flush_domains = 0;
1014         bool flush_chipset = false;
1015         int ret;
1016
1017         list_for_each_entry(vma, vmas, exec_list) {
1018                 struct drm_i915_gem_object *obj = vma->obj;
1019
1020                 if (obj->flags & other_rings) {
1021                         ret = i915_gem_object_sync(obj, req);
1022                         if (ret)
1023                                 return ret;
1024                 }
1025
1026                 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
1027                         flush_chipset |= i915_gem_clflush_object(obj, false);
1028
1029                 flush_domains |= obj->base.write_domain;
1030         }
1031
1032         if (flush_chipset)
1033                 i915_gem_chipset_flush(req->engine->i915);
1034
1035         if (flush_domains & I915_GEM_DOMAIN_GTT)
1036                 wmb();
1037
1038         /* Unconditionally invalidate GPU caches and TLBs. */
1039         return req->engine->emit_flush(req, EMIT_INVALIDATE);
1040 }
1041
1042 static bool
1043 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1044 {
1045         if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
1046                 return false;
1047
1048         /* Kernel clipping was a DRI1 misfeature */
1049         if (exec->num_cliprects || exec->cliprects_ptr)
1050                 return false;
1051
1052         if (exec->DR4 == 0xffffffff) {
1053                 DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1054                 exec->DR4 = 0;
1055         }
1056         if (exec->DR1 || exec->DR4)
1057                 return false;
1058
1059         if ((exec->batch_start_offset | exec->batch_len) & 0x7)
1060                 return false;
1061
1062         return true;
1063 }
1064
1065 static int
1066 validate_exec_list(struct drm_device *dev,
1067                    struct drm_i915_gem_exec_object2 *exec,
1068                    int count)
1069 {
1070         unsigned relocs_total = 0;
1071         unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
1072         unsigned invalid_flags;
1073         int i;
1074
1075         /* INTERNAL flags must not overlap with external ones */
1076         BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);
1077
1078         invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
1079         if (USES_FULL_PPGTT(dev))
1080                 invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
1081
1082         for (i = 0; i < count; i++) {
1083                 char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr);
1084                 int length; /* limited by fault_in_pages_readable() */
1085
1086                 if (exec[i].flags & invalid_flags)
1087                         return -EINVAL;
1088
1089                 /* Offset can be used as input (EXEC_OBJECT_PINNED), reject
1090                  * any non-page-aligned or non-canonical addresses.
1091                  */
1092                 if (exec[i].flags & EXEC_OBJECT_PINNED) {
1093                         if (exec[i].offset !=
1094                             gen8_canonical_addr(exec[i].offset & PAGE_MASK))
1095                                 return -EINVAL;
1096
1097                         /* From drm_mm perspective address space is continuous,
1098                          * so from this point we're always using non-canonical
1099                          * form internally.
1100                          */
1101                         exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
1102                 }
1103
1104                 if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
1105                         return -EINVAL;
1106
1107                 /* pad_to_size was once a reserved field, so sanitize it */
1108                 if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
1109                         if (offset_in_page(exec[i].pad_to_size))
1110                                 return -EINVAL;
1111                 } else {
1112                         exec[i].pad_to_size = 0;
1113                 }
1114
1115                 /* First check for malicious input causing overflow in
1116                  * the worst case where we need to allocate the entire
1117                  * relocation tree as a single array.
1118                  */
1119                 if (exec[i].relocation_count > relocs_max - relocs_total)
1120                         return -EINVAL;
1121                 relocs_total += exec[i].relocation_count;
1122
1123                 length = exec[i].relocation_count *
1124                         sizeof(struct drm_i915_gem_relocation_entry);
1125                 /*
1126                  * We must check that the entire relocation array is safe
1127                  * to read, but since we may need to update the presumed
1128                  * offsets during execution, check for full write access.
1129                  */
1130                 if (!access_ok(VERIFY_WRITE, ptr, length))
1131                         return -EFAULT;
1132
1133                 if (likely(!i915.prefault_disable)) {
1134                         if (fault_in_multipages_readable(ptr, length))
1135                                 return -EFAULT;
1136                 }
1137         }
1138
1139         return 0;
1140 }
1141
1142 static struct i915_gem_context *
1143 i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
1144                           struct intel_engine_cs *engine, const u32 ctx_id)
1145 {
1146         struct i915_gem_context *ctx = NULL;
1147         struct i915_ctx_hang_stats *hs;
1148
1149         if (engine->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
1150                 return ERR_PTR(-EINVAL);
1151
1152         ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
1153         if (IS_ERR(ctx))
1154                 return ctx;
1155
1156         hs = &ctx->hang_stats;
1157         if (hs->banned) {
1158                 DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
1159                 return ERR_PTR(-EIO);
1160         }
1161
1162         return ctx;
1163 }
1164
1165 void i915_vma_move_to_active(struct i915_vma *vma,
1166                              struct drm_i915_gem_request *req,
1167                              unsigned int flags)
1168 {
1169         struct drm_i915_gem_object *obj = vma->obj;
1170         const unsigned int idx = req->engine->id;
1171
1172         GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
1173
1174         obj->dirty = 1; /* be paranoid  */
1175
1176         /* Add a reference if we're newly entering the active list.
1177          * The order in which we add operations to the retirement queue is
1178          * vital here: mark_active adds to the start of the callback list,
1179          * such that subsequent callbacks are called first. Therefore we
1180          * add the active reference first and queue for it to be dropped
1181          * *last*.
1182          */
1183         if (!i915_gem_object_is_active(obj))
1184                 i915_gem_object_get(obj);
1185         i915_gem_object_set_active(obj, idx);
1186         i915_gem_active_set(&obj->last_read[idx], req);
1187
1188         if (flags & EXEC_OBJECT_WRITE) {
1189                 i915_gem_active_set(&obj->last_write, req);
1190
1191                 intel_fb_obj_invalidate(obj, ORIGIN_CS);
1192
1193                 /* update for the implicit flush after a batch */
1194                 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1195         }
1196
1197         if (flags & EXEC_OBJECT_NEEDS_FENCE) {
1198                 i915_gem_active_set(&obj->last_fence, req);
1199                 if (flags & __EXEC_OBJECT_HAS_FENCE) {
1200                         struct drm_i915_private *dev_priv = req->i915;
1201
1202                         list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
1203                                        &dev_priv->mm.fence_list);
1204                 }
1205         }
1206
1207         i915_vma_set_active(vma, idx);
1208         i915_gem_active_set(&vma->last_read[idx], req);
1209         list_move_tail(&vma->vm_link, &vma->vm->active_list);
1210 }
1211
1212 static void eb_export_fence(struct drm_i915_gem_object *obj,
1213                             struct drm_i915_gem_request *req,
1214                             unsigned int flags)
1215 {
1216         struct reservation_object *resv;
1217
1218         resv = i915_gem_object_get_dmabuf_resv(obj);
1219         if (!resv)
1220                 return;
1221
1222         /* Ignore errors from failing to allocate the new fence, we can't
1223          * handle an error right now. Worst case should be missed
1224          * synchronisation leading to rendering corruption.
1225          */
1226         ww_mutex_lock(&resv->lock, NULL);
1227         if (flags & EXEC_OBJECT_WRITE)
1228                 reservation_object_add_excl_fence(resv, &req->fence);
1229         else if (reservation_object_reserve_shared(resv) == 0)
1230                 reservation_object_add_shared_fence(resv, &req->fence);
1231         ww_mutex_unlock(&resv->lock);
1232 }
1233
1234 static void
1235 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
1236                                    struct drm_i915_gem_request *req)
1237 {
1238         struct i915_vma *vma;
1239
1240         list_for_each_entry(vma, vmas, exec_list) {
1241                 struct drm_i915_gem_object *obj = vma->obj;
1242                 u32 old_read = obj->base.read_domains;
1243                 u32 old_write = obj->base.write_domain;
1244
1245                 obj->base.write_domain = obj->base.pending_write_domain;
1246                 if (obj->base.write_domain)
1247                         vma->exec_entry->flags |= EXEC_OBJECT_WRITE;
1248                 else
1249                         obj->base.pending_read_domains |= obj->base.read_domains;
1250                 obj->base.read_domains = obj->base.pending_read_domains;
1251
1252                 i915_vma_move_to_active(vma, req, vma->exec_entry->flags);
1253                 eb_export_fence(obj, req, vma->exec_entry->flags);
1254                 trace_i915_gem_object_change_domain(obj, old_read, old_write);
1255         }
1256 }
1257
1258 static int
1259 i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1260 {
1261         struct intel_ring *ring = req->ring;
1262         int ret, i;
1263
1264         if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
1265                 DRM_DEBUG("sol reset is gen7/rcs only\n");
1266                 return -EINVAL;
1267         }
1268
1269         ret = intel_ring_begin(req, 4 * 3);
1270         if (ret)
1271                 return ret;
1272
1273         for (i = 0; i < 4; i++) {
1274                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1275                 intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
1276                 intel_ring_emit(ring, 0);
1277         }
1278
1279         intel_ring_advance(ring);
1280
1281         return 0;
1282 }
1283
1284 static struct i915_vma*
1285 i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
1286                           struct drm_i915_gem_exec_object2 *shadow_exec_entry,
1287                           struct drm_i915_gem_object *batch_obj,
1288                           struct eb_vmas *eb,
1289                           u32 batch_start_offset,
1290                           u32 batch_len,
1291                           bool is_master)
1292 {
1293         struct drm_i915_gem_object *shadow_batch_obj;
1294         struct i915_vma *vma;
1295         int ret;
1296
1297         shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool,
1298                                                    PAGE_ALIGN(batch_len));
1299         if (IS_ERR(shadow_batch_obj))
1300                 return ERR_CAST(shadow_batch_obj);
1301
1302         ret = intel_engine_cmd_parser(engine,
1303                                       batch_obj,
1304                                       shadow_batch_obj,
1305                                       batch_start_offset,
1306                                       batch_len,
1307                                       is_master);
1308         if (ret)
1309                 goto err;
1310
1311         ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
1312         if (ret)
1313                 goto err;
1314
1315         i915_gem_object_unpin_pages(shadow_batch_obj);
1316
1317         memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
1318
1319         vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
1320         vma->exec_entry = shadow_exec_entry;
1321         vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
1322         i915_gem_object_get(shadow_batch_obj);
1323         list_add_tail(&vma->exec_list, &eb->vmas);
1324
1325         return vma;
1326
1327 err:
1328         i915_gem_object_unpin_pages(shadow_batch_obj);
1329         if (ret == -EACCES) /* unhandled chained batch */
1330                 return NULL;
1331         else
1332                 return ERR_PTR(ret);
1333 }
1334
1335 static int
1336 execbuf_submit(struct i915_execbuffer_params *params,
1337                struct drm_i915_gem_execbuffer2 *args,
1338                struct list_head *vmas)
1339 {
1340         struct drm_i915_private *dev_priv = params->request->i915;
1341         u64 exec_start, exec_len;
1342         int instp_mode;
1343         u32 instp_mask;
1344         int ret;
1345
1346         ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
1347         if (ret)
1348                 return ret;
1349
1350         ret = i915_switch_context(params->request);
1351         if (ret)
1352                 return ret;
1353
1354         instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1355         instp_mask = I915_EXEC_CONSTANTS_MASK;
1356         switch (instp_mode) {
1357         case I915_EXEC_CONSTANTS_REL_GENERAL:
1358         case I915_EXEC_CONSTANTS_ABSOLUTE:
1359         case I915_EXEC_CONSTANTS_REL_SURFACE:
1360                 if (instp_mode != 0 && params->engine->id != RCS) {
1361                         DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1362                         return -EINVAL;
1363                 }
1364
1365                 if (instp_mode != dev_priv->relative_constants_mode) {
1366                         if (INTEL_INFO(dev_priv)->gen < 4) {
1367                                 DRM_DEBUG("no rel constants on pre-gen4\n");
1368                                 return -EINVAL;
1369                         }
1370
1371                         if (INTEL_INFO(dev_priv)->gen > 5 &&
1372                             instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1373                                 DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1374                                 return -EINVAL;
1375                         }
1376
1377                         /* The HW changed the meaning on this bit on gen6 */
1378                         if (INTEL_INFO(dev_priv)->gen >= 6)
1379                                 instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1380                 }
1381                 break;
1382         default:
1383                 DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
1384                 return -EINVAL;
1385         }
1386
1387         if (params->engine->id == RCS &&
1388             instp_mode != dev_priv->relative_constants_mode) {
1389                 struct intel_ring *ring = params->request->ring;
1390
1391                 ret = intel_ring_begin(params->request, 4);
1392                 if (ret)
1393                         return ret;
1394
1395                 intel_ring_emit(ring, MI_NOOP);
1396                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1397                 intel_ring_emit_reg(ring, INSTPM);
1398                 intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1399                 intel_ring_advance(ring);
1400
1401                 dev_priv->relative_constants_mode = instp_mode;
1402         }
1403
1404         if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1405                 ret = i915_reset_gen7_sol_offsets(params->request);
1406                 if (ret)
1407                         return ret;
1408         }
1409
1410         exec_len   = args->batch_len;
1411         exec_start = params->batch->node.start +
1412                      params->args_batch_start_offset;
1413
1414         if (exec_len == 0)
1415                 exec_len = params->batch->size;
1416
1417         ret = params->engine->emit_bb_start(params->request,
1418                                             exec_start, exec_len,
1419                                             params->dispatch_flags);
1420         if (ret)
1421                 return ret;
1422
1423         trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
1424
1425         i915_gem_execbuffer_move_to_active(vmas, params->request);
1426
1427         return 0;
1428 }
1429
1430 /**
1431  * Find one BSD ring to dispatch the corresponding BSD command.
1432  * The engine index is returned.
1433  */
1434 static unsigned int
1435 gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
1436                          struct drm_file *file)
1437 {
1438         struct drm_i915_file_private *file_priv = file->driver_priv;
1439
1440         /* Check whether the file_priv has already selected one ring. */
1441         if ((int)file_priv->bsd_engine < 0) {
1442                 /* If not, use the ping-pong mechanism to select one. */
1443                 mutex_lock(&dev_priv->drm.struct_mutex);
1444                 file_priv->bsd_engine = dev_priv->mm.bsd_engine_dispatch_index;
1445                 dev_priv->mm.bsd_engine_dispatch_index ^= 1;
1446                 mutex_unlock(&dev_priv->drm.struct_mutex);
1447         }
1448
1449         return file_priv->bsd_engine;
1450 }
1451
1452 #define I915_USER_RINGS (4)
1453
1454 static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
1455         [I915_EXEC_DEFAULT]     = RCS,
1456         [I915_EXEC_RENDER]      = RCS,
1457         [I915_EXEC_BLT]         = BCS,
1458         [I915_EXEC_BSD]         = VCS,
1459         [I915_EXEC_VEBOX]       = VECS
1460 };
1461
1462 static struct intel_engine_cs *
1463 eb_select_engine(struct drm_i915_private *dev_priv,
1464                  struct drm_file *file,
1465                  struct drm_i915_gem_execbuffer2 *args)
1466 {
1467         unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
1468         struct intel_engine_cs *engine;
1469
1470         if (user_ring_id > I915_USER_RINGS) {
1471                 DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
1472                 return NULL;
1473         }
1474
1475         if ((user_ring_id != I915_EXEC_BSD) &&
1476             ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
1477                 DRM_DEBUG("execbuf with non bsd ring but with invalid "
1478                           "bsd dispatch flags: %d\n", (int)(args->flags));
1479                 return NULL;
1480         }
1481
1482         if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
1483                 unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
1484
1485                 if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
1486                         bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
1487                 } else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
1488                            bsd_idx <= I915_EXEC_BSD_RING2) {
1489                         bsd_idx >>= I915_EXEC_BSD_SHIFT;
1490                         bsd_idx--;
1491                 } else {
1492                         DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
1493                                   bsd_idx);
1494                         return NULL;
1495                 }
1496
1497                 engine = &dev_priv->engine[_VCS(bsd_idx)];
1498         } else {
1499                 engine = &dev_priv->engine[user_ring_map[user_ring_id]];
1500         }
1501
1502         if (!intel_engine_initialized(engine)) {
1503                 DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
1504                 return NULL;
1505         }
1506
1507         return engine;
1508 }
1509
1510 static int
1511 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1512                        struct drm_file *file,
1513                        struct drm_i915_gem_execbuffer2 *args,
1514                        struct drm_i915_gem_exec_object2 *exec)
1515 {
1516         struct drm_i915_private *dev_priv = to_i915(dev);
1517         struct i915_ggtt *ggtt = &dev_priv->ggtt;
1518         struct eb_vmas *eb;
1519         struct drm_i915_gem_exec_object2 shadow_exec_entry;
1520         struct intel_engine_cs *engine;
1521         struct i915_gem_context *ctx;
1522         struct i915_address_space *vm;
1523         struct i915_execbuffer_params params_master; /* XXX: will be removed later */
1524         struct i915_execbuffer_params *params = &params_master;
1525         const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1526         u32 dispatch_flags;
1527         int ret;
1528         bool need_relocs;
1529
1530         if (!i915_gem_check_execbuffer(args))
1531                 return -EINVAL;
1532
1533         ret = validate_exec_list(dev, exec, args->buffer_count);
1534         if (ret)
1535                 return ret;
1536
1537         dispatch_flags = 0;
1538         if (args->flags & I915_EXEC_SECURE) {
1539                 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
1540                     return -EPERM;
1541
1542                 dispatch_flags |= I915_DISPATCH_SECURE;
1543         }
1544         if (args->flags & I915_EXEC_IS_PINNED)
1545                 dispatch_flags |= I915_DISPATCH_PINNED;
1546
1547         engine = eb_select_engine(dev_priv, file, args);
1548         if (!engine)
1549                 return -EINVAL;
1550
1551         if (args->buffer_count < 1) {
1552                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1553                 return -EINVAL;
1554         }
1555
1556         if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
1557                 if (!HAS_RESOURCE_STREAMER(dev)) {
1558                         DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
1559                         return -EINVAL;
1560                 }
1561                 if (engine->id != RCS) {
1562                         DRM_DEBUG("RS is not available on %s\n",
1563                                  engine->name);
1564                         return -EINVAL;
1565                 }
1566
1567                 dispatch_flags |= I915_DISPATCH_RS;
1568         }
1569
1570         /* Take a local wakeref for preparing to dispatch the execbuf as
1571          * we expect to access the hardware fairly frequently in the
1572          * process. Upon first dispatch, we acquire another prolonged
1573          * wakeref that we hold until the GPU has been idle for at least
1574          * 100ms.
1575          */
1576         intel_runtime_pm_get(dev_priv);
1577
1578         ret = i915_mutex_lock_interruptible(dev);
1579         if (ret)
1580                 goto pre_mutex_err;
1581
1582         ctx = i915_gem_validate_context(dev, file, engine, ctx_id);
1583         if (IS_ERR(ctx)) {
1584                 mutex_unlock(&dev->struct_mutex);
1585                 ret = PTR_ERR(ctx);
1586                 goto pre_mutex_err;
1587         }
1588
1589         i915_gem_context_get(ctx);
1590
1591         if (ctx->ppgtt)
1592                 vm = &ctx->ppgtt->base;
1593         else
1594                 vm = &ggtt->base;
1595
1596         memset(&params_master, 0x00, sizeof(params_master));
1597
1598         eb = eb_create(args);
1599         if (eb == NULL) {
1600                 i915_gem_context_put(ctx);
1601                 mutex_unlock(&dev->struct_mutex);
1602                 ret = -ENOMEM;
1603                 goto pre_mutex_err;
1604         }
1605
1606         /* Look up object handles */
1607         ret = eb_lookup_vmas(eb, exec, args, vm, file);
1608         if (ret)
1609                 goto err;
1610
1611         /* take note of the batch buffer before we might reorder the lists */
1612         params->batch = eb_get_batch(eb);
1613
1614         /* Move the objects en-masse into the GTT, evicting if necessary. */
1615         need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1616         ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
1617                                           &need_relocs);
1618         if (ret)
1619                 goto err;
1620
1621         /* The objects are in their final locations, apply the relocations. */
1622         if (need_relocs)
1623                 ret = i915_gem_execbuffer_relocate(eb);
1624         if (ret) {
1625                 if (ret == -EFAULT) {
1626                         ret = i915_gem_execbuffer_relocate_slow(dev, args, file,
1627                                                                 engine,
1628                                                                 eb, exec, ctx);
1629                         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1630                 }
1631                 if (ret)
1632                         goto err;
1633         }
1634
1635         /* Set the pending read domains for the batch buffer to COMMAND */
1636         if (params->batch->obj->base.pending_write_domain) {
1637                 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1638                 ret = -EINVAL;
1639                 goto err;
1640         }
1641
1642         params->args_batch_start_offset = args->batch_start_offset;
1643         if (intel_engine_needs_cmd_parser(engine) && args->batch_len) {
1644                 struct i915_vma *vma;
1645
1646                 vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry,
1647                                                 params->batch->obj,
1648                                                 eb,
1649                                                 args->batch_start_offset,
1650                                                 args->batch_len,
1651                                                 drm_is_current_master(file));
1652                 if (IS_ERR(vma)) {
1653                         ret = PTR_ERR(vma);
1654                         goto err;
1655                 }
1656
1657                 if (vma) {
1658                         /*
1659                          * Batch parsed and accepted:
1660                          *
1661                          * Set the DISPATCH_SECURE bit to remove the NON_SECURE
1662                          * bit from MI_BATCH_BUFFER_START commands issued in
1663                          * the dispatch_execbuffer implementations. We
1664                          * specifically don't want that set on batches the
1665                          * command parser has accepted.
1666                          */
1667                         dispatch_flags |= I915_DISPATCH_SECURE;
1668                         params->args_batch_start_offset = 0;
1669                         params->batch = vma;
1670                 }
1671         }
1672
1673         params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1674
1675         /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1676          * batch" bit. Hence we need to pin secure batches into the global gtt.
1677          * hsw should have this fixed, but bdw mucks it up again. */
1678         if (dispatch_flags & I915_DISPATCH_SECURE) {
1679                 struct drm_i915_gem_object *obj = params->batch->obj;
1680
1681                 /*
1682                  * So on first glance it looks freaky that we pin the batch here
1683                  * outside of the reservation loop. But:
1684                  * - The batch is already pinned into the relevant ppgtt, so we
1685                  *   already have the backing storage fully allocated.
1686                  * - No other BO uses the global gtt (well contexts, but meh),
1687                  *   so we don't really have issues with multiple objects not
1688                  *   fitting due to fragmentation.
1689                  * So this is actually safe.
1690                  */
1691                 ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
1692                 if (ret)
1693                         goto err;
1694
1695                 params->batch = i915_gem_obj_to_ggtt(obj);
1696         }
1697
1698         /* Allocate a request for this batch buffer nice and early. */
1699         params->request = i915_gem_request_alloc(engine, ctx);
1700         if (IS_ERR(params->request)) {
1701                 ret = PTR_ERR(params->request);
1702                 goto err_batch_unpin;
1703         }
1704
1705         ret = i915_gem_request_add_to_client(params->request, file);
1706         if (ret)
1707                 goto err_request;
1708
1709         /*
1710          * Save assorted stuff away to pass through to *_submission().
1711          * NB: This data should be 'persistent' and not local as it will
1712          * kept around beyond the duration of the IOCTL once the GPU
1713          * scheduler arrives.
1714          */
1715         params->dev                     = dev;
1716         params->file                    = file;
1717         params->engine                    = engine;
1718         params->dispatch_flags          = dispatch_flags;
1719         params->ctx                     = ctx;
1720
1721         ret = execbuf_submit(params, args, &eb->vmas);
1722 err_request:
1723         __i915_add_request(params->request, params->batch->obj, ret == 0);
1724
1725 err_batch_unpin:
1726         /*
1727          * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1728          * batch vma for correctness. For less ugly and less fragility this
1729          * needs to be adjusted to also track the ggtt batch vma properly as
1730          * active.
1731          */
1732         if (dispatch_flags & I915_DISPATCH_SECURE)
1733                 i915_vma_unpin(params->batch);
1734 err:
1735         /* the request owns the ref now */
1736         i915_gem_context_put(ctx);
1737         eb_destroy(eb);
1738
1739         mutex_unlock(&dev->struct_mutex);
1740
1741 pre_mutex_err:
1742         /* intel_gpu_busy should also get a ref, so it will free when the device
1743          * is really idle. */
1744         intel_runtime_pm_put(dev_priv);
1745         return ret;
1746 }
1747
1748 /*
1749  * Legacy execbuffer just creates an exec2 list from the original exec object
1750  * list array and passes it to the real function.
1751  */
1752 int
1753 i915_gem_execbuffer(struct drm_device *dev, void *data,
1754                     struct drm_file *file)
1755 {
1756         struct drm_i915_gem_execbuffer *args = data;
1757         struct drm_i915_gem_execbuffer2 exec2;
1758         struct drm_i915_gem_exec_object *exec_list = NULL;
1759         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1760         int ret, i;
1761
1762         if (args->buffer_count < 1) {
1763                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1764                 return -EINVAL;
1765         }
1766
1767         /* Copy in the exec list from userland */
1768         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1769         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1770         if (exec_list == NULL || exec2_list == NULL) {
1771                 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1772                           args->buffer_count);
1773                 drm_free_large(exec_list);
1774                 drm_free_large(exec2_list);
1775                 return -ENOMEM;
1776         }
1777         ret = copy_from_user(exec_list,
1778                              u64_to_user_ptr(args->buffers_ptr),
1779                              sizeof(*exec_list) * args->buffer_count);
1780         if (ret != 0) {
1781                 DRM_DEBUG("copy %d exec entries failed %d\n",
1782                           args->buffer_count, ret);
1783                 drm_free_large(exec_list);
1784                 drm_free_large(exec2_list);
1785                 return -EFAULT;
1786         }
1787
1788         for (i = 0; i < args->buffer_count; i++) {
1789                 exec2_list[i].handle = exec_list[i].handle;
1790                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
1791                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1792                 exec2_list[i].alignment = exec_list[i].alignment;
1793                 exec2_list[i].offset = exec_list[i].offset;
1794                 if (INTEL_INFO(dev)->gen < 4)
1795                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1796                 else
1797                         exec2_list[i].flags = 0;
1798         }
1799
1800         exec2.buffers_ptr = args->buffers_ptr;
1801         exec2.buffer_count = args->buffer_count;
1802         exec2.batch_start_offset = args->batch_start_offset;
1803         exec2.batch_len = args->batch_len;
1804         exec2.DR1 = args->DR1;
1805         exec2.DR4 = args->DR4;
1806         exec2.num_cliprects = args->num_cliprects;
1807         exec2.cliprects_ptr = args->cliprects_ptr;
1808         exec2.flags = I915_EXEC_RENDER;
1809         i915_execbuffer2_set_context_id(exec2, 0);
1810
1811         ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1812         if (!ret) {
1813                 struct drm_i915_gem_exec_object __user *user_exec_list =
1814                         u64_to_user_ptr(args->buffers_ptr);
1815
1816                 /* Copy the new buffer offsets back to the user's exec list. */
1817                 for (i = 0; i < args->buffer_count; i++) {
1818                         exec2_list[i].offset =
1819                                 gen8_canonical_addr(exec2_list[i].offset);
1820                         ret = __copy_to_user(&user_exec_list[i].offset,
1821                                              &exec2_list[i].offset,
1822                                              sizeof(user_exec_list[i].offset));
1823                         if (ret) {
1824                                 ret = -EFAULT;
1825                                 DRM_DEBUG("failed to copy %d exec entries "
1826                                           "back to user (%d)\n",
1827                                           args->buffer_count, ret);
1828                                 break;
1829                         }
1830                 }
1831         }
1832
1833         drm_free_large(exec_list);
1834         drm_free_large(exec2_list);
1835         return ret;
1836 }
1837
1838 int
1839 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1840                      struct drm_file *file)
1841 {
1842         struct drm_i915_gem_execbuffer2 *args = data;
1843         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1844         int ret;
1845
1846         if (args->buffer_count < 1 ||
1847             args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1848                 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1849                 return -EINVAL;
1850         }
1851
1852         if (args->rsvd2 != 0) {
1853                 DRM_DEBUG("dirty rvsd2 field\n");
1854                 return -EINVAL;
1855         }
1856
1857         exec2_list = drm_malloc_gfp(args->buffer_count,
1858                                     sizeof(*exec2_list),
1859                                     GFP_TEMPORARY);
1860         if (exec2_list == NULL) {
1861                 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1862                           args->buffer_count);
1863                 return -ENOMEM;
1864         }
1865         ret = copy_from_user(exec2_list,
1866                              u64_to_user_ptr(args->buffers_ptr),
1867                              sizeof(*exec2_list) * args->buffer_count);
1868         if (ret != 0) {
1869                 DRM_DEBUG("copy %d exec entries failed %d\n",
1870                           args->buffer_count, ret);
1871                 drm_free_large(exec2_list);
1872                 return -EFAULT;
1873         }
1874
1875         ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1876         if (!ret) {
1877                 /* Copy the new buffer offsets back to the user's exec list. */
1878                 struct drm_i915_gem_exec_object2 __user *user_exec_list =
1879                                    u64_to_user_ptr(args->buffers_ptr);
1880                 int i;
1881
1882                 for (i = 0; i < args->buffer_count; i++) {
1883                         exec2_list[i].offset =
1884                                 gen8_canonical_addr(exec2_list[i].offset);
1885                         ret = __copy_to_user(&user_exec_list[i].offset,
1886                                              &exec2_list[i].offset,
1887                                              sizeof(user_exec_list[i].offset));
1888                         if (ret) {
1889                                 ret = -EFAULT;
1890                                 DRM_DEBUG("failed to copy %d exec entries "
1891                                           "back to user\n",
1892                                           args->buffer_count);
1893                                 break;
1894                         }
1895                 }
1896         }
1897
1898         drm_free_large(exec2_list);
1899         return ret;
1900 }