X-Git-Url: http://git.cascardo.info/?p=cascardo%2Flinux.git;a=blobdiff_plain;f=drivers%2Fgpu%2Fdrm%2Fi915%2Fi915_gem_execbuffer.c;h=7adb4c77cc7f449698f0207d5f51cbf037cff179;hp=b35e5b6475b245a9061a5437c919e1716d409894;hb=6b25e21fa6f26d0f0d45f161d169029411c84286;hpb=c1fd2794a4111501027dc60b3fd9d2a29789ea47 diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index b35e5b6475b2..7adb4c77cc7f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -26,22 +26,42 @@ * */ +#include +#include +#include + #include #include + #include "i915_drv.h" +#include "i915_gem_dmabuf.h" #include "i915_trace.h" #include "intel_drv.h" -#include -#include +#include "intel_frontbuffer.h" -#define __EXEC_OBJECT_HAS_PIN (1<<31) -#define __EXEC_OBJECT_HAS_FENCE (1<<30) -#define __EXEC_OBJECT_NEEDS_MAP (1<<29) -#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ + +#define __EXEC_OBJECT_HAS_PIN (1<<31) +#define __EXEC_OBJECT_HAS_FENCE (1<<30) +#define __EXEC_OBJECT_NEEDS_MAP (1<<29) +#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) +#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */ #define BATCH_OFFSET_BIAS (256*1024) +struct i915_execbuffer_params { + struct drm_device *dev; + struct drm_file *file; + struct i915_vma *batch; + u32 dispatch_flags; + u32 args_batch_start_offset; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + struct drm_i915_gem_request *request; +}; + struct eb_vmas { + struct drm_i915_private *i915; struct list_head vmas; int and; union { @@ -51,7 +71,8 @@ struct eb_vmas { }; static struct eb_vmas * -eb_create(struct drm_i915_gem_execbuffer2 *args) +eb_create(struct drm_i915_private *i915, + struct drm_i915_gem_execbuffer2 *args) { struct eb_vmas *eb = NULL; @@ -78,6 +99,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args) } else eb->and = -args->buffer_count; + eb->i915 = i915; INIT_LIST_HEAD(&eb->vmas); return eb; } @@ -89,6 +111,26 @@ eb_reset(struct eb_vmas *eb) memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } +static struct i915_vma * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); + + /* + * SNA is doing fancy tricks with compressing batch buffers, which leads + * to negative relocation deltas. Usually that works out ok since the + * relocate address is still positive, except when the batch is placed + * very low in the GTT. Ensure this doesn't happen. + * + * Note that actual hangs have only been observed on gen7, but for + * paranoia do it everywhere. + */ + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return vma; +} + static int eb_lookup_vmas(struct eb_vmas *eb, struct drm_i915_gem_exec_object2 *exec, @@ -122,7 +164,7 @@ eb_lookup_vmas(struct eb_vmas *eb, goto err; } - drm_gem_object_reference(&obj->base); + i915_gem_object_get(obj); list_add_tail(&obj->obj_exec_link, &objects); } spin_unlock(&file->table_lock); @@ -143,8 +185,8 @@ eb_lookup_vmas(struct eb_vmas *eb, * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - vma = i915_gem_obj_lookup_or_create_vma(obj, vm); - if (IS_ERR(vma)) { + vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL); + if (unlikely(IS_ERR(vma))) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); goto err; @@ -175,7 +217,7 @@ err: struct drm_i915_gem_object, obj_exec_link); list_del_init(&obj->obj_exec_link); - drm_gem_object_unreference(&obj->base); + i915_gem_object_put(obj); } /* * Objects already transfered to the vmas list will be unreferenced by @@ -208,7 +250,6 @@ static void i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry; - struct drm_i915_gem_object *obj = vma->obj; if (!drm_mm_node_allocated(&vma->node)) return; @@ -216,10 +257,10 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) entry = vma->exec_entry; if (entry->flags & __EXEC_OBJECT_HAS_FENCE) - i915_gem_object_unpin_fence(obj); + i915_vma_unpin_fence(vma); if (entry->flags & __EXEC_OBJECT_HAS_PIN) - vma->pin_count--; + __i915_vma_unpin(vma); entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); } @@ -234,13 +275,19 @@ static void eb_destroy(struct eb_vmas *eb) exec_list); list_del_init(&vma->exec_list); i915_gem_execbuffer_unreserve_vma(vma); - drm_gem_object_unreference(&vma->obj->base); + i915_vma_put(vma); } kfree(eb); } static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) { + if (!i915_gem_object_has_struct_page(obj)) + return false; + + if (DBG_USE_CPU_RELOC) + return DBG_USE_CPU_RELOC > 0; + return (HAS_LLC(obj->base.dev) || obj->base.write_domain == I915_GEM_DOMAIN_CPU || obj->cache_level != I915_CACHE_NONE); @@ -265,144 +312,265 @@ static inline uint64_t gen8_noncanonical_addr(uint64_t address) } static inline uint64_t -relocation_target(struct drm_i915_gem_relocation_entry *reloc, +relocation_target(const struct drm_i915_gem_relocation_entry *reloc, uint64_t target_offset) { return gen8_canonical_addr((int)reloc->delta + target_offset); } -static int -relocate_entry_cpu(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc, - uint64_t target_offset) +struct reloc_cache { + struct drm_i915_private *i915; + struct drm_mm_node node; + unsigned long vaddr; + unsigned int page; + bool use_64bit_reloc; +}; + +static void reloc_cache_init(struct reloc_cache *cache, + struct drm_i915_private *i915) { - struct drm_device *dev = obj->base.dev; - uint32_t page_offset = offset_in_page(reloc->offset); - uint64_t delta = relocation_target(reloc, target_offset); - char *vaddr; - int ret; + cache->page = -1; + cache->vaddr = 0; + cache->i915 = i915; + cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8; + cache->node.allocated = false; +} - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - return ret; +static inline void *unmask_page(unsigned long p) +{ + return (void *)(uintptr_t)(p & PAGE_MASK); +} + +static inline unsigned int unmask_flags(unsigned long p) +{ + return p & ~PAGE_MASK; +} + +#define KMAP 0x4 /* after CLFLUSH_FLAGS */ + +static void reloc_cache_fini(struct reloc_cache *cache) +{ + void *vaddr; - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, - reloc->offset >> PAGE_SHIFT)); - *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); + if (!cache->vaddr) + return; - if (INTEL_INFO(dev)->gen >= 8) { - page_offset = offset_in_page(page_offset + sizeof(uint32_t)); + vaddr = unmask_page(cache->vaddr); + if (cache->vaddr & KMAP) { + if (cache->vaddr & CLFLUSH_AFTER) + mb(); - if (page_offset == 0) { - kunmap_atomic(vaddr); - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, - (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); + kunmap_atomic(vaddr); + i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm); + } else { + wmb(); + io_mapping_unmap_atomic((void __iomem *)vaddr); + if (cache->node.allocated) { + struct i915_ggtt *ggtt = &cache->i915->ggtt; + + ggtt->base.clear_range(&ggtt->base, + cache->node.start, + cache->node.size, + true); + drm_mm_remove_node(&cache->node); + } else { + i915_vma_unpin((struct i915_vma *)cache->node.mm); } + } +} + +static void *reloc_kmap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + int page) +{ + void *vaddr; + + if (cache->vaddr) { + kunmap_atomic(unmask_page(cache->vaddr)); + } else { + unsigned int flushes; + int ret; + + ret = i915_gem_obj_prepare_shmem_write(obj, &flushes); + if (ret) + return ERR_PTR(ret); + + BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); + BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); - *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta); + cache->vaddr = flushes | KMAP; + cache->node.mm = (void *)obj; + if (flushes) + mb(); } - kunmap_atomic(vaddr); + vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); + cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; + cache->page = page; - return 0; + return vaddr; } -static int -relocate_entry_gtt(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc, - uint64_t target_offset) +static void *reloc_iomap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + int page) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - uint64_t delta = relocation_target(reloc, target_offset); - uint64_t offset; - void __iomem *reloc_page; - int ret; + struct i915_ggtt *ggtt = &cache->i915->ggtt; + unsigned long offset; + void *vaddr; + + if (cache->node.allocated) { + wmb(); + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, page), + cache->node.start, I915_CACHE_NONE, 0); + cache->page = page; + return unmask_page(cache->vaddr); + } - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; + if (cache->vaddr) { + io_mapping_unmap_atomic(unmask_page(cache->vaddr)); + } else { + struct i915_vma *vma; + int ret; - ret = i915_gem_object_put_fence(obj); - if (ret) - return ret; + if (use_cpu_reloc(obj)) + return NULL; + + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + return ERR_PTR(ret); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | PIN_NONBLOCK); + if (IS_ERR(vma)) { + memset(&cache->node, 0, sizeof(cache->node)); + ret = drm_mm_insert_node_in_range_generic + (&ggtt->base.mm, &cache->node, + 4096, 0, 0, + 0, ggtt->mappable_end, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); + if (ret) /* no inactive aperture space, use cpu reloc */ + return NULL; + } else { + ret = i915_vma_put_fence(vma); + if (ret) { + i915_vma_unpin(vma); + return ERR_PTR(ret); + } - /* Map the page containing the relocation we're going to perform. */ - offset = i915_gem_obj_ggtt_offset(obj); - offset += reloc->offset; - reloc_page = io_mapping_map_atomic_wc(ggtt->mappable, - offset & PAGE_MASK); - iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset)); - - if (INTEL_INFO(dev)->gen >= 8) { - offset += sizeof(uint32_t); - - if (offset_in_page(offset) == 0) { - io_mapping_unmap_atomic(reloc_page); - reloc_page = - io_mapping_map_atomic_wc(ggtt->mappable, - offset); + cache->node.start = vma->node.start; + cache->node.mm = (void *)vma; } + } - iowrite32(upper_32_bits(delta), - reloc_page + offset_in_page(offset)); + offset = cache->node.start; + if (cache->node.allocated) { + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, page), + offset, I915_CACHE_NONE, 0); + } else { + offset += page << PAGE_SHIFT; } - io_mapping_unmap_atomic(reloc_page); + vaddr = io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset); + cache->page = page; + cache->vaddr = (unsigned long)vaddr; - return 0; + return vaddr; } -static void -clflush_write32(void *addr, uint32_t value) +static void *reloc_vaddr(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + int page) { - /* This is not a fast path, so KISS. */ - drm_clflush_virt_range(addr, sizeof(uint32_t)); - *(uint32_t *)addr = value; - drm_clflush_virt_range(addr, sizeof(uint32_t)); + void *vaddr; + + if (cache->page == page) { + vaddr = unmask_page(cache->vaddr); + } else { + vaddr = NULL; + if ((cache->vaddr & KMAP) == 0) + vaddr = reloc_iomap(obj, cache, page); + if (!vaddr) + vaddr = reloc_kmap(obj, cache, page); + } + + return vaddr; } -static int -relocate_entry_clflush(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc, - uint64_t target_offset) +static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) { - struct drm_device *dev = obj->base.dev; - uint32_t page_offset = offset_in_page(reloc->offset); - uint64_t delta = relocation_target(reloc, target_offset); - char *vaddr; - int ret; + if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { + if (flushes & CLFLUSH_BEFORE) { + clflushopt(addr); + mb(); + } - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; + *addr = value; + + /* Writes to the same cacheline are serialised by the CPU + * (including clflush). On the write path, we only require + * that it hits memory in an orderly fashion and place + * mb barriers at the start and end of the relocation phase + * to ensure ordering of clflush wrt to the system. + */ + if (flushes & CLFLUSH_AFTER) + clflushopt(addr); + } else + *addr = value; +} - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, - reloc->offset >> PAGE_SHIFT)); - clflush_write32(vaddr + page_offset, lower_32_bits(delta)); +static int +relocate_entry(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_relocation_entry *reloc, + struct reloc_cache *cache, + u64 target_offset) +{ + u64 offset = reloc->offset; + bool wide = cache->use_64bit_reloc; + void *vaddr; + + target_offset = relocation_target(reloc, target_offset); +repeat: + vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_offset), + cache->vaddr); + + if (wide) { + offset += sizeof(u32); + target_offset >>= 32; + wide = false; + goto repeat; + } - if (INTEL_INFO(dev)->gen >= 8) { - page_offset = offset_in_page(page_offset + sizeof(uint32_t)); + return 0; +} - if (page_offset == 0) { - kunmap_atomic(vaddr); - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, - (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); - } +static bool object_is_idle(struct drm_i915_gem_object *obj) +{ + unsigned long active = i915_gem_object_get_active(obj); + int idx; - clflush_write32(vaddr + page_offset, upper_32_bits(delta)); + for_each_active(active, idx) { + if (!i915_gem_active_is_idle(&obj->last_read[idx], + &obj->base.dev->struct_mutex)) + return false; } - kunmap_atomic(vaddr); - - return 0; + return true; } static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_vmas *eb, - struct drm_i915_gem_relocation_entry *reloc) + struct drm_i915_gem_relocation_entry *reloc, + struct reloc_cache *cache) { struct drm_device *dev = obj->base.dev; struct drm_gem_object *target_obj; @@ -465,7 +633,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Check that the relocation address is valid... */ if (unlikely(reloc->offset > - obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) { + obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) { DRM_DEBUG("Relocation beyond object bounds: " "obj %p target %d offset %d size %d.\n", obj, reloc->target_handle, @@ -482,26 +650,15 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, } /* We can't wait for rendering with pagefaults disabled */ - if (obj->active && pagefault_disabled()) + if (pagefault_disabled() && !object_is_idle(obj)) return -EFAULT; - if (use_cpu_reloc(obj)) - ret = relocate_entry_cpu(obj, reloc, target_offset); - else if (obj->map_and_fenceable) - ret = relocate_entry_gtt(obj, reloc, target_offset); - else if (static_cpu_has(X86_FEATURE_CLFLUSH)) - ret = relocate_entry_clflush(obj, reloc, target_offset); - else { - WARN_ONCE(1, "Impossible case in relocation handling\n"); - ret = -ENODEV; - } - + ret = relocate_entry(obj, reloc, cache, target_offset); if (ret) return ret; /* and update the user's relocation entry */ reloc->presumed_offset = target_offset; - return 0; } @@ -513,9 +670,11 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; struct drm_i915_gem_relocation_entry __user *user_relocs; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - int remain, ret; + struct reloc_cache cache; + int remain, ret = 0; user_relocs = u64_to_user_ptr(entry->relocs_ptr); + reloc_cache_init(&cache, eb->i915); remain = entry->relocation_count; while (remain) { @@ -525,19 +684,23 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, count = ARRAY_SIZE(stack_reloc); remain -= count; - if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) - return -EFAULT; + if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) { + ret = -EFAULT; + goto out; + } do { u64 offset = r->presumed_offset; - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r); + ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache); if (ret) - return ret; + goto out; if (r->presumed_offset != offset && - __put_user(r->presumed_offset, &user_relocs->presumed_offset)) { - return -EFAULT; + __put_user(r->presumed_offset, + &user_relocs->presumed_offset)) { + ret = -EFAULT; + goto out; } user_relocs++; @@ -545,7 +708,9 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, } while (--count); } - return 0; +out: + reloc_cache_fini(&cache); + return ret; #undef N_RELOC } @@ -555,15 +720,18 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, struct drm_i915_gem_relocation_entry *relocs) { const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - int i, ret; + struct reloc_cache cache; + int i, ret = 0; + reloc_cache_init(&cache, eb->i915); for (i = 0; i < entry->relocation_count; i++) { - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]); + ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache); if (ret) - return ret; + break; } + reloc_cache_fini(&cache); - return 0; + return ret; } static int @@ -626,23 +794,27 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= PIN_HIGH; } - ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); - if ((ret == -ENOSPC || ret == -E2BIG) && + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags); + if ((ret == -ENOSPC || ret == -E2BIG) && only_mappable_for_reloc(entry->flags)) - ret = i915_gem_object_pin(obj, vma->vm, - entry->alignment, - flags & ~PIN_MAPPABLE); + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags & ~PIN_MAPPABLE); if (ret) return ret; entry->flags |= __EXEC_OBJECT_HAS_PIN; if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - ret = i915_gem_object_get_fence(obj); + ret = i915_vma_get_fence(vma); if (ret) return ret; - if (i915_gem_object_pin_fence(obj)) + if (i915_vma_pin_fence(vma)) entry->flags |= __EXEC_OBJECT_HAS_FENCE; } @@ -667,7 +839,7 @@ need_reloc_mappable(struct i915_vma *vma) if (entry->relocation_count == 0) return false; - if (!vma->is_ggtt) + if (!i915_vma_is_ggtt(vma)) return false; /* See also use_cpu_reloc() */ @@ -684,14 +856,17 @@ static bool eb_vma_misplaced(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - struct drm_i915_gem_object *obj = vma->obj; - WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt); + WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && + !i915_vma_is_ggtt(vma)); if (entry->alignment && vma->node.start & (entry->alignment - 1)) return true; + if (vma->node.size < entry->pad_to_size) + return true; + if (entry->flags & EXEC_OBJECT_PINNED && vma->node.start != entry->offset) return true; @@ -701,7 +876,8 @@ eb_vma_misplaced(struct i915_vma *vma) return true; /* avoid costly ping-pong once a batch bo ended up non-mappable */ - if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && + !i915_vma_is_map_and_fenceable(vma)) return !only_mappable_for_reloc(entry->flags); if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && @@ -725,8 +901,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4; int retry; - i915_gem_retire_requests_ring(engine); - vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; INIT_LIST_HEAD(&ordered_vmas); @@ -746,7 +920,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine, entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; + i915_gem_object_is_tiled(obj); need_mappable = need_fence || need_reloc_mappable(vma); if (entry->flags & EXEC_OBJECT_PINNED) @@ -843,7 +1017,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); list_del_init(&vma->exec_list); i915_gem_execbuffer_unreserve_vma(vma); - drm_gem_object_unreference(&vma->obj->base); + i915_vma_put(vma); } mutex_unlock(&dev->struct_mutex); @@ -937,23 +1111,45 @@ err: return ret; } +static unsigned int eb_other_engines(struct drm_i915_gem_request *req) +{ + unsigned int mask; + + mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK; + mask <<= I915_BO_ACTIVE_SHIFT; + + return mask; +} + static int i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, struct list_head *vmas) { - const unsigned other_rings = ~intel_engine_flag(req->engine); + const unsigned int other_rings = eb_other_engines(req); struct i915_vma *vma; int ret; list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; + struct reservation_object *resv; - if (obj->active & other_rings) { - ret = i915_gem_object_sync(obj, req->engine, &req); + if (obj->flags & other_rings) { + ret = i915_gem_request_await_object + (req, obj, obj->base.pending_write_domain); if (ret) return ret; } + resv = i915_gem_object_get_dmabuf_resv(obj); + if (resv) { + ret = i915_sw_fence_await_reservation + (&req->submit, resv, &i915_fence_ops, + obj->base.pending_write_domain, 10*HZ, + GFP_KERNEL | __GFP_NOWARN); + if (ret < 0) + return ret; + } + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) i915_gem_clflush_object(obj, false); } @@ -961,10 +1157,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, /* Unconditionally flush any chipset caches (for streaming writes). */ i915_gem_chipset_flush(req->engine->i915); - /* Unconditionally invalidate gpu caches and ensure that we do flush - * any residual writes from the previous batch. - */ - return intel_ring_invalidate_all_caches(req); + /* Unconditionally invalidate GPU caches and TLBs. */ + return req->engine->emit_flush(req, EMIT_INVALIDATE); } static bool @@ -1000,6 +1194,9 @@ validate_exec_list(struct drm_device *dev, unsigned invalid_flags; int i; + /* INTERNAL flags must not overlap with external ones */ + BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); + invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; if (USES_FULL_PPGTT(dev)) invalid_flags |= EXEC_OBJECT_NEEDS_GTT; @@ -1029,6 +1226,14 @@ validate_exec_list(struct drm_device *dev, if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) return -EINVAL; + /* pad_to_size was once a reserved field, so sanitize it */ + if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { + if (offset_in_page(exec[i].pad_to_size)) + return -EINVAL; + } else { + exec[i].pad_to_size = 0; + } + /* First check for malicious input causing overflow in * the worst case where we need to allocate the entire * relocation tree as a single array. @@ -1048,7 +1253,7 @@ validate_exec_list(struct drm_device *dev, return -EFAULT; if (likely(!i915.prefault_disable)) { - if (fault_in_multipages_readable(ptr, length)) + if (fault_in_pages_readable(ptr, length)) return -EFAULT; } } @@ -1060,12 +1265,9 @@ static struct i915_gem_context * i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *engine, const u32 ctx_id) { - struct i915_gem_context *ctx = NULL; + struct i915_gem_context *ctx; struct i915_ctx_hang_stats *hs; - if (engine->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE) - return ERR_PTR(-EINVAL); - ctx = i915_gem_context_lookup(file->driver_priv, ctx_id); if (IS_ERR(ctx)) return ctx; @@ -1079,66 +1281,99 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } -void +void i915_vma_move_to_active(struct i915_vma *vma, + struct drm_i915_gem_request *req, + unsigned int flags) +{ + struct drm_i915_gem_object *obj = vma->obj; + const unsigned int idx = req->engine->id; + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + + obj->dirty = 1; /* be paranoid */ + + /* Add a reference if we're newly entering the active list. + * The order in which we add operations to the retirement queue is + * vital here: mark_active adds to the start of the callback list, + * such that subsequent callbacks are called first. Therefore we + * add the active reference first and queue for it to be dropped + * *last*. + */ + if (!i915_gem_object_is_active(obj)) + i915_gem_object_get(obj); + i915_gem_object_set_active(obj, idx); + i915_gem_active_set(&obj->last_read[idx], req); + + if (flags & EXEC_OBJECT_WRITE) { + i915_gem_active_set(&obj->last_write, req); + + intel_fb_obj_invalidate(obj, ORIGIN_CS); + + /* update for the implicit flush after a batch */ + obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + } + + if (flags & EXEC_OBJECT_NEEDS_FENCE) + i915_gem_active_set(&vma->last_fence, req); + + i915_vma_set_active(vma, idx); + i915_gem_active_set(&vma->last_read[idx], req); + list_move_tail(&vma->vm_link, &vma->vm->active_list); +} + +static void eb_export_fence(struct drm_i915_gem_object *obj, + struct drm_i915_gem_request *req, + unsigned int flags) +{ + struct reservation_object *resv; + + resv = i915_gem_object_get_dmabuf_resv(obj); + if (!resv) + return; + + /* Ignore errors from failing to allocate the new fence, we can't + * handle an error right now. Worst case should be missed + * synchronisation leading to rendering corruption. + */ + ww_mutex_lock(&resv->lock, NULL); + if (flags & EXEC_OBJECT_WRITE) + reservation_object_add_excl_fence(resv, &req->fence); + else if (reservation_object_reserve_shared(resv) == 0) + reservation_object_add_shared_fence(resv, &req->fence); + ww_mutex_unlock(&resv->lock); +} + +static void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = i915_gem_request_get_engine(req); struct i915_vma *vma; list_for_each_entry(vma, vmas, exec_list) { - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_object *obj = vma->obj; u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; - obj->dirty = 1; /* be paranoid */ obj->base.write_domain = obj->base.pending_write_domain; - if (obj->base.write_domain == 0) + if (obj->base.write_domain) + vma->exec_entry->flags |= EXEC_OBJECT_WRITE; + else obj->base.pending_read_domains |= obj->base.read_domains; obj->base.read_domains = obj->base.pending_read_domains; - i915_vma_move_to_active(vma, req); - if (obj->base.write_domain) { - i915_gem_request_assign(&obj->last_write_req, req); - - intel_fb_obj_invalidate(obj, ORIGIN_CS); - - /* update for the implicit flush after a batch */ - obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; - } - if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - i915_gem_request_assign(&obj->last_fenced_req, req); - if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { - struct drm_i915_private *dev_priv = engine->i915; - list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, - &dev_priv->mm.fence_list); - } - } - + i915_vma_move_to_active(vma, req, vma->exec_entry->flags); + eb_export_fence(obj, req, vma->exec_entry->flags); trace_i915_gem_object_change_domain(obj, old_read, old_write); } } -static void -i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) -{ - /* Unconditionally force add_request to emit a full flush. */ - params->engine->gpu_caches_dirty = true; - - /* Add a breadcrumb for the completion of the batch buffer */ - __i915_add_request(params->request, params->batch_obj, true); -} - static int -i915_reset_gen7_sol_offsets(struct drm_device *dev, - struct drm_i915_gem_request *req) +i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_engine_cs *engine = req->engine; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_ring *ring = req->ring; int ret, i; - if (!IS_GEN7(dev) || engine != &dev_priv->engine[RCS]) { + if (!IS_GEN7(req->i915) || req->engine->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } @@ -1148,21 +1383,21 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return ret; for (i = 0; i < 4; i++) { - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, GEN7_SO_WRITE_OFFSET(i)); - intel_ring_emit(engine, 0); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); + intel_ring_emit(ring, 0); } - intel_ring_advance(engine); + intel_ring_advance(ring); return 0; } -static struct drm_i915_gem_object* +static struct i915_vma * i915_gem_execbuffer_parse(struct intel_engine_cs *engine, struct drm_i915_gem_exec_object2 *shadow_exec_entry, - struct eb_vmas *eb, struct drm_i915_gem_object *batch_obj, + struct eb_vmas *eb, u32 batch_start_offset, u32 batch_len, bool is_master) @@ -1174,51 +1409,44 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool, PAGE_ALIGN(batch_len)); if (IS_ERR(shadow_batch_obj)) - return shadow_batch_obj; - - ret = i915_parse_cmds(engine, - batch_obj, - shadow_batch_obj, - batch_start_offset, - batch_len, - is_master); - if (ret) - goto err; - - ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0); - if (ret) - goto err; + return ERR_CAST(shadow_batch_obj); + + ret = intel_engine_cmd_parser(engine, + batch_obj, + shadow_batch_obj, + batch_start_offset, + batch_len, + is_master); + if (ret) { + if (ret == -EACCES) /* unhandled chained batch */ + vma = NULL; + else + vma = ERR_PTR(ret); + goto out; + } - i915_gem_object_unpin_pages(shadow_batch_obj); + vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + goto out; memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); - vma = i915_gem_obj_to_ggtt(shadow_batch_obj); vma->exec_entry = shadow_exec_entry; vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; - drm_gem_object_reference(&shadow_batch_obj->base); + i915_gem_object_get(shadow_batch_obj); list_add_tail(&vma->exec_list, &eb->vmas); - shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND; - - return shadow_batch_obj; - -err: +out: i915_gem_object_unpin_pages(shadow_batch_obj); - if (ret == -EACCES) /* unhandled chained batch */ - return batch_obj; - else - return ERR_PTR(ret); + return vma; } -int -i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas) +static int +execbuf_submit(struct i915_execbuffer_params *params, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas) { - struct drm_device *dev = params->dev; - struct intel_engine_cs *engine = params->engine; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; int instp_mode; u32 instp_mask; @@ -1232,34 +1460,31 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, if (ret) return ret; - WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<id), - "%s didn't clear reload\n", engine->name); - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; instp_mask = I915_EXEC_CONSTANTS_MASK; switch (instp_mode) { case I915_EXEC_CONSTANTS_REL_GENERAL: case I915_EXEC_CONSTANTS_ABSOLUTE: case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) { + if (instp_mode != 0 && params->engine->id != RCS) { DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); return -EINVAL; } if (instp_mode != dev_priv->relative_constants_mode) { - if (INTEL_INFO(dev)->gen < 4) { + if (INTEL_INFO(dev_priv)->gen < 4) { DRM_DEBUG("no rel constants on pre-gen4\n"); return -EINVAL; } - if (INTEL_INFO(dev)->gen > 5 && + if (INTEL_INFO(dev_priv)->gen > 5 && instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); return -EINVAL; } /* The HW changed the meaning on this bit on gen6 */ - if (INTEL_INFO(dev)->gen >= 6) + if (INTEL_INFO(dev_priv)->gen >= 6) instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; } break; @@ -1268,37 +1493,39 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, return -EINVAL; } - if (engine == &dev_priv->engine[RCS] && + if (params->engine->id == RCS && instp_mode != dev_priv->relative_constants_mode) { + struct intel_ring *ring = params->request->ring; + ret = intel_ring_begin(params->request, 4); if (ret) return ret; - intel_ring_emit(engine, MI_NOOP); - intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(engine, INSTPM); - intel_ring_emit(engine, instp_mask << 16 | instp_mode); - intel_ring_advance(engine); + intel_ring_emit(ring, MI_NOOP); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit_reg(ring, INSTPM); + intel_ring_emit(ring, instp_mask << 16 | instp_mode); + intel_ring_advance(ring); dev_priv->relative_constants_mode = instp_mode; } if (args->flags & I915_EXEC_GEN7_SOL_RESET) { - ret = i915_reset_gen7_sol_offsets(dev, params->request); + ret = i915_reset_gen7_sol_offsets(params->request); if (ret) return ret; } exec_len = args->batch_len; - exec_start = params->batch_obj_vm_offset + + exec_start = params->batch->node.start + params->args_batch_start_offset; if (exec_len == 0) - exec_len = params->batch_obj->base.size; + exec_len = params->batch->size - params->args_batch_start_offset; - ret = engine->dispatch_execbuffer(params->request, - exec_start, exec_len, - params->dispatch_flags); + ret = params->engine->emit_bb_start(params->request, + exec_start, exec_len, + params->dispatch_flags); if (ret) return ret; @@ -1311,43 +1538,20 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, /** * Find one BSD ring to dispatch the corresponding BSD command. - * The ring index is returned. + * The engine index is returned. */ static unsigned int -gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file) +gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv, + struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; /* Check whether the file_priv has already selected one ring. */ - if ((int)file_priv->bsd_ring < 0) { - /* If not, use the ping-pong mechanism to select one. */ - mutex_lock(&dev_priv->drm.struct_mutex); - file_priv->bsd_ring = dev_priv->mm.bsd_ring_dispatch_index; - dev_priv->mm.bsd_ring_dispatch_index ^= 1; - mutex_unlock(&dev_priv->drm.struct_mutex); - } - - return file_priv->bsd_ring; -} - -static struct drm_i915_gem_object * -eb_get_batch(struct eb_vmas *eb) -{ - struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); + if ((int)file_priv->bsd_engine < 0) + file_priv->bsd_engine = atomic_fetch_xor(1, + &dev_priv->mm.bsd_engine_dispatch_index); - /* - * SNA is doing fancy tricks with compressing batch buffers, which leads - * to negative relocation deltas. Usually that works out ok since the - * relocate address is still positive, except when the batch is placed - * very low in the GTT. Ensure this doesn't happen. - * - * Note that actual hangs have only been observed on gen7, but for - * paranoia do it everywhere. - */ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return vma->obj; + return file_priv->bsd_engine; } #define I915_USER_RINGS (4) @@ -1360,31 +1564,31 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { [I915_EXEC_VEBOX] = VECS }; -static int -eb_select_ring(struct drm_i915_private *dev_priv, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args, - struct intel_engine_cs **ring) +static struct intel_engine_cs * +eb_select_engine(struct drm_i915_private *dev_priv, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) { unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; + struct intel_engine_cs *engine; if (user_ring_id > I915_USER_RINGS) { DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); - return -EINVAL; + return NULL; } if ((user_ring_id != I915_EXEC_BSD) && ((args->flags & I915_EXEC_BSD_MASK) != 0)) { DRM_DEBUG("execbuf with non bsd ring but with invalid " "bsd dispatch flags: %d\n", (int)(args->flags)); - return -EINVAL; + return NULL; } if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) { unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; if (bsd_idx == I915_EXEC_BSD_DEFAULT) { - bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file); + bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file); } else if (bsd_idx >= I915_EXEC_BSD_RING1 && bsd_idx <= I915_EXEC_BSD_RING2) { bsd_idx >>= I915_EXEC_BSD_SHIFT; @@ -1392,20 +1596,20 @@ eb_select_ring(struct drm_i915_private *dev_priv, } else { DRM_DEBUG("execbuf with unknown bsd ring: %u\n", bsd_idx); - return -EINVAL; + return NULL; } - *ring = &dev_priv->engine[_VCS(bsd_idx)]; + engine = &dev_priv->engine[_VCS(bsd_idx)]; } else { - *ring = &dev_priv->engine[user_ring_map[user_ring_id]]; + engine = &dev_priv->engine[user_ring_map[user_ring_id]]; } - if (!intel_engine_initialized(*ring)) { + if (!intel_engine_initialized(engine)) { DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); - return -EINVAL; + return NULL; } - return 0; + return engine; } static int @@ -1416,9 +1620,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_i915_gem_request *req = NULL; struct eb_vmas *eb; - struct drm_i915_gem_object *batch_obj; struct drm_i915_gem_exec_object2 shadow_exec_entry; struct intel_engine_cs *engine; struct i915_gem_context *ctx; @@ -1447,9 +1649,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args->flags & I915_EXEC_IS_PINNED) dispatch_flags |= I915_DISPATCH_PINNED; - ret = eb_select_ring(dev_priv, file, args, &engine); - if (ret) - return ret; + engine = eb_select_engine(dev_priv, file, args); + if (!engine) + return -EINVAL; if (args->buffer_count < 1) { DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); @@ -1489,7 +1691,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } - i915_gem_context_reference(ctx); + i915_gem_context_get(ctx); if (ctx->ppgtt) vm = &ctx->ppgtt->base; @@ -1498,9 +1700,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, memset(¶ms_master, 0x00, sizeof(params_master)); - eb = eb_create(args); + eb = eb_create(dev_priv, args); if (eb == NULL) { - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); mutex_unlock(&dev->struct_mutex); ret = -ENOMEM; goto pre_mutex_err; @@ -1512,7 +1714,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; /* take note of the batch buffer before we might reorder the lists */ - batch_obj = eb_get_batch(eb); + params->batch = eb_get_batch(eb); /* Move the objects en-masse into the GTT, evicting if necessary. */ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; @@ -1536,34 +1738,34 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (batch_obj->base.pending_write_domain) { + if (params->batch->obj->base.pending_write_domain) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; } + if (args->batch_start_offset > params->batch->size || + args->batch_len > params->batch->size - args->batch_start_offset) { + DRM_DEBUG("Attempting to use out-of-bounds batch\n"); + ret = -EINVAL; + goto err; + } params->args_batch_start_offset = args->batch_start_offset; - if (i915_needs_cmd_parser(engine) && args->batch_len) { - struct drm_i915_gem_object *parsed_batch_obj; - - parsed_batch_obj = i915_gem_execbuffer_parse(engine, - &shadow_exec_entry, - eb, - batch_obj, - args->batch_start_offset, - args->batch_len, - drm_is_current_master(file)); - if (IS_ERR(parsed_batch_obj)) { - ret = PTR_ERR(parsed_batch_obj); + if (intel_engine_needs_cmd_parser(engine) && args->batch_len) { + struct i915_vma *vma; + + vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry, + params->batch->obj, + eb, + args->batch_start_offset, + args->batch_len, + drm_is_current_master(file)); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto err; } - /* - * parsed_batch_obj == batch_obj means batch not fully parsed: - * Accept, but don't promote to secure. - */ - - if (parsed_batch_obj != batch_obj) { + if (vma) { /* * Batch parsed and accepted: * @@ -1575,16 +1777,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, */ dispatch_flags |= I915_DISPATCH_SECURE; params->args_batch_start_offset = 0; - batch_obj = parsed_batch_obj; + params->batch = vma; } } - batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ if (dispatch_flags & I915_DISPATCH_SECURE) { + struct drm_i915_gem_object *obj = params->batch->obj; + struct i915_vma *vma; + /* * So on first glance it looks freaky that we pin the batch here * outside of the reservation loop. But: @@ -1595,22 +1800,31 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * fitting due to fragmentation. * So this is actually safe. */ - ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); - if (ret) + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto err; + } - params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj); - } else - params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); + params->batch = vma; + } /* Allocate a request for this batch buffer nice and early. */ - req = i915_gem_request_alloc(engine, ctx); - if (IS_ERR(req)) { - ret = PTR_ERR(req); + params->request = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(params->request)) { + ret = PTR_ERR(params->request); goto err_batch_unpin; } - ret = i915_gem_request_add_to_client(req, file); + /* Whilst this request exists, batch_obj will be on the + * active_list, and so will hold the active reference. Only when this + * request is retired will the the batch_obj be moved onto the + * inactive_list and lose its active reference. Hence we do not need + * to explicitly hold another reference here. + */ + params->request->batch = params->batch; + + ret = i915_gem_request_add_to_client(params->request, file); if (ret) goto err_request; @@ -1624,13 +1838,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->file = file; params->engine = engine; params->dispatch_flags = dispatch_flags; - params->batch_obj = batch_obj; params->ctx = ctx; - params->request = req; - ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas); + ret = execbuf_submit(params, args, &eb->vmas); err_request: - i915_gem_execbuffer_retire_commands(params); + __i915_add_request(params->request, ret == 0); err_batch_unpin: /* @@ -1640,11 +1852,10 @@ err_batch_unpin: * active. */ if (dispatch_flags & I915_DISPATCH_SECURE) - i915_gem_object_ggtt_unpin(batch_obj); - + i915_vma_unpin(params->batch); err: /* the request owns the ref now */ - i915_gem_context_unreference(ctx); + i915_gem_context_put(ctx); eb_destroy(eb); mutex_unlock(&dev->struct_mutex);