Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu.h
index f7b49d5..668939a 100644 (file)
 #include <ttm/ttm_module.h>
 #include <ttm/ttm_execbuf_util.h>
 
+#include <drm/drmP.h>
 #include <drm/drm_gem.h>
 #include <drm/amdgpu_drm.h>
 
 #include "amd_shared.h"
-#include "amdgpu_family.h"
 #include "amdgpu_mode.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_irq.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_gds.h"
 
+#include "gpu_scheduler.h"
+
 /*
  * Modules parameters.
  */
@@ -77,7 +79,11 @@ extern int amdgpu_bapm;
 extern int amdgpu_deep_color;
 extern int amdgpu_vm_size;
 extern int amdgpu_vm_block_size;
+extern int amdgpu_enable_scheduler;
+extern int amdgpu_sched_jobs;
+extern int amdgpu_sched_hw_submission;
 
+#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS         3000
 #define AMDGPU_MAX_USEC_TIMEOUT                        100000  /* 100 ms */
 #define AMDGPU_FENCE_JIFFIES_TIMEOUT           (HZ / 2)
 /* AMDGPU_IB_POOL_SIZE must be a power of 2 */
@@ -92,6 +98,9 @@ extern int amdgpu_vm_block_size;
 #define AMDGPU_MAX_COMPUTE_RINGS               8
 #define AMDGPU_MAX_VCE_RINGS                   2
 
+/* max number of IP instances */
+#define AMDGPU_MAX_SDMA_INSTANCES              2
+
 /* number of hw syncs before falling back on blocking */
 #define AMDGPU_NUM_SYNCS                       4
 
@@ -177,7 +186,9 @@ struct amdgpu_vm;
 struct amdgpu_ring;
 struct amdgpu_semaphore;
 struct amdgpu_cs_parser;
+struct amdgpu_job;
 struct amdgpu_irq_src;
+struct amdgpu_fpriv;
 
 enum amdgpu_cp_irq {
        AMDGPU_CP_IRQ_GFX_EOP = 0,
@@ -239,7 +250,7 @@ struct amdgpu_buffer_funcs {
        unsigned        copy_num_dw;
 
        /* used for buffer migration */
-       void (*emit_copy_buffer)(struct amdgpu_ring *ring,
+       void (*emit_copy_buffer)(struct amdgpu_ib *ib,
                                 /* src addr in bytes */
                                 uint64_t src_offset,
                                 /* dst addr in bytes */
@@ -254,7 +265,7 @@ struct amdgpu_buffer_funcs {
        unsigned        fill_num_dw;
 
        /* used for buffer clearing */
-       void (*emit_fill_buffer)(struct amdgpu_ring *ring,
+       void (*emit_fill_buffer)(struct amdgpu_ib *ib,
                                 /* value to write to memory */
                                 uint32_t src_data,
                                 /* dst addr in bytes */
@@ -332,6 +343,8 @@ struct amdgpu_ring_funcs {
        int (*test_ring)(struct amdgpu_ring *ring);
        int (*test_ib)(struct amdgpu_ring *ring);
        bool (*is_lockup)(struct amdgpu_ring *ring);
+       /* insert NOP packets */
+       void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
 };
 
 /*
@@ -381,10 +394,10 @@ struct amdgpu_fence_driver {
        uint64_t                        sync_seq[AMDGPU_MAX_RINGS];
        atomic64_t                      last_seq;
        bool                            initialized;
-       bool                            delayed_irq;
        struct amdgpu_irq_src           *irq_src;
        unsigned                        irq_type;
        struct delayed_work             lockup_work;
+       wait_queue_head_t               fence_queue;
 };
 
 /* some special values for the owner field */
@@ -423,20 +436,20 @@ void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
                                   struct amdgpu_irq_src *irq_src,
                                   unsigned irq_type);
+void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
+void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
 int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
                      struct amdgpu_fence **fence);
-int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner,
-                         uint64_t seq, struct amdgpu_fence **fence);
 void amdgpu_fence_process(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
 
-bool amdgpu_fence_signaled(struct amdgpu_fence *fence);
-int amdgpu_fence_wait(struct amdgpu_fence *fence, bool interruptible);
-int amdgpu_fence_wait_any(struct amdgpu_device *adev,
-                         struct amdgpu_fence **fences,
-                         bool intr);
+signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
+                                 struct fence **array,
+                                 uint32_t count,
+                                 bool intr,
+                                 signed long t);
 struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
 void amdgpu_fence_unref(struct amdgpu_fence **fence);
 
@@ -481,7 +494,7 @@ static inline bool amdgpu_fence_is_earlier(struct amdgpu_fence *a,
        return a->seq < b->seq;
 }
 
-int amdgpu_user_fence_emit(struct amdgpu_ring *ring, struct amdgpu_user_fence *user, 
+int amdgpu_user_fence_emit(struct amdgpu_ring *ring, struct amdgpu_user_fence *user,
                           void *owner, struct amdgpu_fence **fence);
 
 /*
@@ -509,7 +522,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
                       uint64_t dst_offset,
                       uint32_t byte_count,
                       struct reservation_object *resv,
-                      struct amdgpu_fence **fence);
+                      struct fence **fence);
 int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
 
 struct amdgpu_bo_list_entry {
@@ -532,14 +545,16 @@ struct amdgpu_bo_va_mapping {
 struct amdgpu_bo_va {
        /* protected by bo being reserved */
        struct list_head                bo_list;
-       uint64_t                        addr;
-       struct amdgpu_fence             *last_pt_update;
+       struct fence                    *last_pt_update;
        unsigned                        ref_count;
 
-       /* protected by vm mutex */
-       struct list_head                mappings;
+       /* protected by vm mutex and spinlock */
        struct list_head                vm_status;
 
+       /* mappings for this bo_va */
+       struct list_head                invalids;
+       struct list_head                valids;
+
        /* constant after initialization */
        struct amdgpu_vm                *vm;
        struct amdgpu_bo                *bo;
@@ -643,7 +658,7 @@ struct amdgpu_sa_bo {
        struct amdgpu_sa_manager        *manager;
        unsigned                        soffset;
        unsigned                        eoffset;
-       struct amdgpu_fence             *fence;
+       struct fence                    *fence;
 };
 
 /*
@@ -685,7 +700,7 @@ bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
                                struct amdgpu_semaphore *semaphore);
 void amdgpu_semaphore_free(struct amdgpu_device *adev,
                           struct amdgpu_semaphore **semaphore,
-                          struct amdgpu_fence *fence);
+                          struct fence *fence);
 
 /*
  * Synchronization
@@ -693,20 +708,23 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev,
 struct amdgpu_sync {
        struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS];
        struct amdgpu_fence     *sync_to[AMDGPU_MAX_RINGS];
-       struct amdgpu_fence     *last_vm_update;
+       DECLARE_HASHTABLE(fences, 4);
+       struct fence            *last_vm_update;
 };
 
 void amdgpu_sync_create(struct amdgpu_sync *sync);
-void amdgpu_sync_fence(struct amdgpu_sync *sync,
-                      struct amdgpu_fence *fence);
+int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
+                     struct fence *f);
 int amdgpu_sync_resv(struct amdgpu_device *adev,
                     struct amdgpu_sync *sync,
                     struct reservation_object *resv,
                     void *owner);
 int amdgpu_sync_rings(struct amdgpu_sync *sync,
                      struct amdgpu_ring *ring);
+struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
+int amdgpu_sync_wait(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync,
-                     struct amdgpu_fence *fence);
+                     struct fence *fence);
 
 /*
  * GART structures, functions & helpers
@@ -821,7 +839,9 @@ struct amdgpu_flip_work {
        uint64_t                        base;
        struct drm_pending_vblank_event *event;
        struct amdgpu_bo                *old_rbo;
-       struct fence                    *fence;
+       struct fence                    *excl;
+       unsigned                        shared_count;
+       struct fence                    **shared;
 };
 
 
@@ -844,6 +864,8 @@ struct amdgpu_ib {
        uint32_t                        gws_base, gws_size;
        uint32_t                        oa_base, oa_size;
        uint32_t                        flags;
+       /* resulting sequence number */
+       uint64_t                        sequence;
 };
 
 enum amdgpu_ring_type {
@@ -854,11 +876,23 @@ enum amdgpu_ring_type {
        AMDGPU_RING_TYPE_VCE
 };
 
+extern struct amd_sched_backend_ops amdgpu_sched_ops;
+
+int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
+                                        struct amdgpu_ring *ring,
+                                        struct amdgpu_ib *ibs,
+                                        unsigned num_ibs,
+                                        int (*free_job)(struct amdgpu_job *),
+                                        void *owner,
+                                        struct fence **fence);
+
 struct amdgpu_ring {
        struct amdgpu_device            *adev;
        const struct amdgpu_ring_funcs  *funcs;
        struct amdgpu_fence_driver      fence_drv;
+       struct amd_gpu_scheduler        *scheduler;
 
+       spinlock_t              fence_lock;
        struct mutex            *ring_lock;
        struct amdgpu_bo        *ring_obj;
        volatile uint32_t       *ring;
@@ -892,6 +926,7 @@ struct amdgpu_ring {
        struct amdgpu_ctx       *current_ctx;
        enum amdgpu_ring_type   type;
        char                    name[16];
+       bool                    is_pte_ring;
 };
 
 /*
@@ -933,7 +968,7 @@ struct amdgpu_vm_id {
        unsigned                id;
        uint64_t                pd_gpu_addr;
        /* last flushed PD/PT update */
-       struct amdgpu_fence     *flushed_updates;
+       struct fence            *flushed_updates;
        /* last use of vmid */
        struct amdgpu_fence     *last_id_use;
 };
@@ -943,18 +978,22 @@ struct amdgpu_vm {
 
        struct rb_root          va;
 
-       /* protecting invalidated and freed */
+       /* protecting invalidated */
        spinlock_t              status_lock;
 
        /* BOs moved, but not yet updated in the PT */
        struct list_head        invalidated;
 
-       /* BOs freed, but not yet updated in the PT */
+       /* BOs cleared in the PT because of a move */
+       struct list_head        cleared;
+
+       /* BO mappings freed, but not yet updated in the PT */
        struct list_head        freed;
 
        /* contains the page directory */
        struct amdgpu_bo        *page_directory;
        unsigned                max_pde_used;
+       struct fence            *page_directory_fence;
 
        /* array of page tables, one for each page directory entry */
        struct amdgpu_vm_pt     *page_tables;
@@ -983,27 +1022,47 @@ struct amdgpu_vm_manager {
  * context related structures
  */
 
-struct amdgpu_ctx_state {
-       uint64_t flags;
-       uint32_t hangs;
+#define AMDGPU_CTX_MAX_CS_PENDING      16
+
+struct amdgpu_ctx_ring {
+       uint64_t                sequence;
+       struct fence            *fences[AMDGPU_CTX_MAX_CS_PENDING];
+       struct amd_sched_entity entity;
 };
 
 struct amdgpu_ctx {
-       /* call kref_get()before CS start and kref_put() after CS fence signaled */
-       struct kref refcount;
-       struct amdgpu_fpriv *fpriv;
-       struct amdgpu_ctx_state state;
-       uint32_t id;
-       unsigned reset_counter;
+       struct kref             refcount;
+       struct amdgpu_device    *adev;
+       unsigned                reset_counter;
+       spinlock_t              ring_lock;
+       struct amdgpu_ctx_ring  rings[AMDGPU_MAX_RINGS];
 };
 
 struct amdgpu_ctx_mgr {
-       struct amdgpu_device *adev;
-       struct idr ctx_handles;
-       /* lock for IDR system */
-       struct mutex lock;
+       struct amdgpu_device    *adev;
+       struct mutex            lock;
+       /* protected by lock */
+       struct idr              ctx_handles;
 };
 
+int amdgpu_ctx_init(struct amdgpu_device *adev, bool kernel,
+                   struct amdgpu_ctx *ctx);
+void amdgpu_ctx_fini(struct amdgpu_ctx *ctx);
+
+struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
+int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
+
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
+                             struct fence *fence);
+struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
+                                  struct amdgpu_ring *ring, uint64_t seq);
+
+int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
+                    struct drm_file *filp);
+
+void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
+
 /*
  * file private structure
  */
@@ -1012,7 +1071,7 @@ struct amdgpu_fpriv {
        struct amdgpu_vm        vm;
        struct mutex            bo_list_lock;
        struct idr              bo_list_handles;
-       struct amdgpu_ctx_mgr ctx_mgr;
+       struct amdgpu_ctx_mgr   ctx_mgr;
 };
 
 /*
@@ -1160,6 +1219,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
 void amdgpu_ring_free_size(struct amdgpu_ring *ring);
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
 int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
@@ -1207,6 +1267,16 @@ struct amdgpu_cs_parser {
        struct amdgpu_user_fence uf;
 };
 
+struct amdgpu_job {
+       struct amd_sched_job    base;
+       struct amdgpu_device    *adev;
+       struct amdgpu_ib        *ibs;
+       uint32_t                num_ibs;
+       struct mutex            job_lock;
+       struct amdgpu_user_fence uf;
+       int (*free_job)(struct amdgpu_job *sched_job);
+};
+
 static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx)
 {
        return p->ibs[ib_idx].ptr[idx];
@@ -1601,7 +1671,6 @@ struct amdgpu_uvd {
        struct amdgpu_bo        *vcpu_bo;
        void                    *cpu_addr;
        uint64_t                gpu_addr;
-       void                    *saved_bo;
        atomic_t                handles[AMDGPU_MAX_UVD_HANDLES];
        struct drm_file         *filp[AMDGPU_MAX_UVD_HANDLES];
        struct delayed_work     idle_work;
@@ -1645,6 +1714,7 @@ struct amdgpu_sdma {
        uint32_t                feature_version;
 
        struct amdgpu_ring      ring;
+       bool                    burst_nop;
 };
 
 /*
@@ -1849,17 +1919,12 @@ struct amdgpu_atcs {
        struct amdgpu_atcs_functions functions;
 };
 
-int amdgpu_ctx_alloc(struct amdgpu_device *adev,struct amdgpu_fpriv *fpriv,
-                                                       uint32_t *id,uint32_t flags);
-int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
-                                                 uint32_t id);
-
-void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv);
-struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
-int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
+/*
+ * CGS
+ */
+void *amdgpu_cgs_create_device(struct amdgpu_device *adev);
+void amdgpu_cgs_destroy_device(void *cgs_device);
 
-extern int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
-                                                struct drm_file *filp);
 
 /*
  * Core structure, functions and helpers.
@@ -1883,7 +1948,7 @@ struct amdgpu_device {
        struct rw_semaphore             exclusive_lock;
 
        /* ASIC */
-       enum amdgpu_asic_type           asic_type;
+       enum amd_asic_type              asic_type;
        uint32_t                        family;
        uint32_t                        rev_id;
        uint32_t                        external_rev_id;
@@ -1976,7 +2041,6 @@ struct amdgpu_device {
        struct amdgpu_irq_src           hpd_irq;
 
        /* rings */
-       wait_queue_head_t               fence_queue;
        unsigned                        fence_context;
        struct mutex                    ring_lock;
        unsigned                        num_rings;
@@ -1999,7 +2063,7 @@ struct amdgpu_device {
        struct amdgpu_gfx               gfx;
 
        /* sdma */
-       struct amdgpu_sdma              sdma[2];
+       struct amdgpu_sdma              sdma[AMDGPU_MAX_SDMA_INSTANCES];
        struct amdgpu_irq_src           sdma_trap_irq;
        struct amdgpu_irq_src           sdma_illegal_inst_irq;
 
@@ -2025,6 +2089,12 @@ struct amdgpu_device {
        /* tracking pinned memory */
        u64 vram_pin_size;
        u64 gart_pin_size;
+
+       /* amdkfd interface */
+       struct kfd_dev          *kfd;
+
+       /* kernel conext for IB submission */
+       struct amdgpu_ctx       kernel_ctx;
 };
 
 bool amdgpu_device_is_px(struct drm_device *dev);
@@ -2132,6 +2202,21 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
        ring->ring_free_dw--;
 }
 
+static inline struct amdgpu_sdma * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
+{
+       struct amdgpu_device *adev = ring->adev;
+       int i;
+
+       for (i = 0; i < AMDGPU_MAX_SDMA_INSTANCES; i++)
+               if (&adev->sdma[i].ring == ring)
+                       break;
+
+       if (i < AMDGPU_MAX_SDMA_INSTANCES)
+               return &adev->sdma[i];
+       else
+               return NULL;
+}
+
 /*
  * ASICs macro.
  */
@@ -2183,8 +2268,8 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
 #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
 #define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s))
 #define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s))
-#define amdgpu_emit_copy_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((r), (s), (d), (b))
-#define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b))
+#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b))
+#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
 #define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev))
 #define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev))
 #define amdgpu_dpm_set_power_state(adev) (adev)->pm.funcs->set_power_state((adev))
@@ -2212,6 +2297,12 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
 bool amdgpu_card_posted(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 bool amdgpu_boot_test_post_card(struct amdgpu_device *adev);
+struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
+                                                struct drm_file *filp,
+                                                struct amdgpu_ctx *ctx,
+                                                struct amdgpu_ib *ibs,
+                                                uint32_t num_ibs);
+
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
                       u32 ip_instance, u32 ring,
@@ -2275,11 +2366,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
                                          struct amdgpu_vm *vm,
                                          struct list_head *head);
-struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
-                                      struct amdgpu_vm *vm);
+int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+                     struct amdgpu_sync *sync);
 void amdgpu_vm_flush(struct amdgpu_ring *ring,
                     struct amdgpu_vm *vm,
-                    struct amdgpu_fence *updates);
+                    struct fence *updates);
 void amdgpu_vm_fence(struct amdgpu_device *adev,
                     struct amdgpu_vm *vm,
                     struct amdgpu_fence *fence);
@@ -2309,7 +2400,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
                       uint64_t addr);
 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
                      struct amdgpu_bo_va *bo_va);
-
+int amdgpu_vm_free_job(struct amdgpu_job *job);
 /*
  * functions used by amdgpu_encoder.c
  */