drm/amd: add scheduler fence implementation (v2)
[cascardo/linux.git] / drivers / gpu / drm / amd / scheduler / gpu_scheduler.c
1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  *
23  */
24 #include <linux/kthread.h>
25 #include <linux/wait.h>
26 #include <linux/sched.h>
27 #include <drm/drmP.h>
28 #include "gpu_scheduler.h"
29
30 /* Initialize a given run queue struct */
31 static void init_rq(struct amd_run_queue *rq)
32 {
33         INIT_LIST_HEAD(&rq->head.list);
34         rq->head.belongto_rq = rq;
35         mutex_init(&rq->lock);
36         atomic_set(&rq->nr_entity, 0);
37         rq->current_entity = &rq->head;
38 }
39
40 /* Note: caller must hold the lock or in a atomic context */
41 static void rq_remove_entity(struct amd_run_queue *rq,
42                              struct amd_sched_entity *entity)
43 {
44         if (rq->current_entity == entity)
45                 rq->current_entity = list_entry(entity->list.prev,
46                                                 typeof(*entity), list);
47         list_del_init(&entity->list);
48         atomic_dec(&rq->nr_entity);
49 }
50
51 static void rq_add_entity(struct amd_run_queue *rq,
52                           struct amd_sched_entity *entity)
53 {
54         list_add_tail(&entity->list, &rq->head.list);
55         atomic_inc(&rq->nr_entity);
56 }
57
58 /**
59  * Select next entity from a specified run queue with round robin policy.
60  * It could return the same entity as current one if current is the only
61  * available one in the queue. Return NULL if nothing available.
62  */
63 static struct amd_sched_entity *rq_select_entity(struct amd_run_queue *rq)
64 {
65         struct amd_sched_entity *p = rq->current_entity;
66         int i = atomic_read(&rq->nr_entity) + 1; /*real count + dummy head*/
67
68         while (i) {
69                 p = list_entry(p->list.next, typeof(*p), list);
70                 if (!rq->check_entity_status(p)) {
71                         rq->current_entity = p;
72                         break;
73                 }
74                 i--;
75         }
76         return i ? p : NULL;
77 }
78
79 static bool context_entity_is_waiting(struct amd_sched_entity *entity)
80 {
81         /* TODO: sync obj for multi-ring synchronization */
82         return false;
83 }
84
85 static int gpu_entity_check_status(struct amd_sched_entity *entity)
86 {
87         if (entity == &entity->belongto_rq->head)
88                 return -1;
89
90         if (kfifo_is_empty(&entity->job_queue) ||
91             context_entity_is_waiting(entity))
92                 return -1;
93
94         return 0;
95 }
96
97 /**
98  * Note: This function should only been called inside scheduler main
99  * function for thread safety, there is no other protection here.
100  * return ture if scheduler has something ready to run.
101  *
102  * For active_hw_rq, there is only one producer(scheduler thread) and
103  * one consumer(ISR). It should be safe to use this function in scheduler
104  * main thread to decide whether to continue emit more IBs.
105 */
106 static bool is_scheduler_ready(struct amd_gpu_scheduler *sched)
107 {
108         unsigned long flags;
109         bool full;
110
111         spin_lock_irqsave(&sched->queue_lock, flags);
112         full = atomic64_read(&sched->hw_rq_count) <
113                 sched->hw_submission_limit ? true : false;
114         spin_unlock_irqrestore(&sched->queue_lock, flags);
115
116         return full;
117 }
118
119 /**
120  * Select next entity from the kernel run queue, if not available,
121  * return null.
122 */
123 static struct amd_sched_entity *
124 kernel_rq_select_context(struct amd_gpu_scheduler *sched)
125 {
126         struct amd_sched_entity *sched_entity;
127         struct amd_run_queue *rq = &sched->kernel_rq;
128
129         mutex_lock(&rq->lock);
130         sched_entity = rq_select_entity(rq);
131         mutex_unlock(&rq->lock);
132         return sched_entity;
133 }
134
135 /**
136  * Select next entity containing real IB submissions
137 */
138 static struct amd_sched_entity *
139 select_context(struct amd_gpu_scheduler *sched)
140 {
141         struct amd_sched_entity *wake_entity = NULL;
142         struct amd_sched_entity *tmp;
143         struct amd_run_queue *rq;
144
145         if (!is_scheduler_ready(sched))
146                 return NULL;
147
148         /* Kernel run queue has higher priority than normal run queue*/
149         tmp = kernel_rq_select_context(sched);
150         if (tmp != NULL)
151                 goto exit;
152
153         rq = &sched->sched_rq;
154         mutex_lock(&rq->lock);
155         tmp = rq_select_entity(rq);
156         mutex_unlock(&rq->lock);
157 exit:
158         if (sched->current_entity && (sched->current_entity != tmp))
159                 wake_entity = sched->current_entity;
160         sched->current_entity = tmp;
161         if (wake_entity)
162                 wake_up(&wake_entity->wait_queue);
163         return tmp;
164 }
165
166 /**
167  * Init a context entity used by scheduler when submit to HW ring.
168  *
169  * @sched       The pointer to the scheduler
170  * @entity      The pointer to a valid amd_sched_entity
171  * @rq          The run queue this entity belongs
172  * @kernel      If this is an entity for the kernel
173  * @jobs        The max number of jobs in the job queue
174  *
175  * return 0 if succeed. negative error code on failure
176 */
177 int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
178                           struct amd_sched_entity *entity,
179                           struct amd_run_queue *rq,
180                           uint32_t jobs)
181 {
182         uint64_t seq_ring = 0;
183         char name[20];
184
185         if (!(sched && entity && rq))
186                 return -EINVAL;
187
188         memset(entity, 0, sizeof(struct amd_sched_entity));
189         seq_ring = ((uint64_t)sched->ring_id) << 60;
190         spin_lock_init(&entity->lock);
191         entity->belongto_rq = rq;
192         entity->scheduler = sched;
193         init_waitqueue_head(&entity->wait_queue);
194         init_waitqueue_head(&entity->wait_emit);
195         entity->fence_context = fence_context_alloc(1);
196         snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context);
197         memcpy(entity->name, name, 20);
198         INIT_LIST_HEAD(&entity->fence_list);
199         if(kfifo_alloc(&entity->job_queue,
200                        jobs * sizeof(void *),
201                        GFP_KERNEL))
202                 return -EINVAL;
203
204         spin_lock_init(&entity->queue_lock);
205         atomic64_set(&entity->last_emitted_v_seq, seq_ring);
206         atomic64_set(&entity->last_queued_v_seq, seq_ring);
207         atomic64_set(&entity->last_signaled_v_seq, seq_ring);
208
209         /* Add the entity to the run queue */
210         mutex_lock(&rq->lock);
211         rq_add_entity(rq, entity);
212         mutex_unlock(&rq->lock);
213         return 0;
214 }
215
216 /**
217  * Query if entity is initialized
218  *
219  * @sched       Pointer to scheduler instance
220  * @entity      The pointer to a valid scheduler entity
221  *
222  * return true if entity is initialized, false otherwise
223 */
224 static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched,
225                                           struct amd_sched_entity *entity)
226 {
227         return entity->scheduler == sched &&
228                 entity->belongto_rq != NULL;
229 }
230
231 static bool is_context_entity_idle(struct amd_gpu_scheduler *sched,
232                                    struct amd_sched_entity *entity)
233 {
234         /**
235          * Idle means no pending IBs, and the entity is not
236          * currently being used.
237         */
238         barrier();
239         if ((sched->current_entity != entity) &&
240             kfifo_is_empty(&entity->job_queue))
241                 return true;
242
243         return false;
244 }
245
246 /**
247  * Destroy a context entity
248  *
249  * @sched       Pointer to scheduler instance
250  * @entity      The pointer to a valid scheduler entity
251  *
252  * return 0 if succeed. negative error code on failure
253  */
254 int amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
255                             struct amd_sched_entity *entity)
256 {
257         int r = 0;
258         struct amd_run_queue *rq = entity->belongto_rq;
259
260         if (!is_context_entity_initialized(sched, entity))
261                 return 0;
262
263         /**
264          * The client will not queue more IBs during this fini, consume existing
265          * queued IBs
266         */
267         r = wait_event_timeout(
268                 entity->wait_queue,
269                 is_context_entity_idle(sched, entity),
270                 msecs_to_jiffies(AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS)
271                 ) ?  0 : -1;
272
273         if (r) {
274                 if (entity->is_pending)
275                         DRM_INFO("Entity %p is in waiting state during fini,\
276                                 all pending ibs will be canceled.\n",
277                                  entity);
278         }
279
280         mutex_lock(&rq->lock);
281         rq_remove_entity(rq, entity);
282         mutex_unlock(&rq->lock);
283         kfifo_free(&entity->job_queue);
284         return r;
285 }
286
287 /**
288  * Submit a normal job to the job queue
289  *
290  * @sched       The pointer to the scheduler
291  * @c_entity    The pointer to amd_sched_entity
292  * @job         The pointer to job required to submit
293  * return 0 if succeed. -1 if failed.
294  *        -2 indicate queue is full for this client, client should wait untill
295  *           scheduler consum some queued command.
296  *        -1 other fail.
297 */
298 int amd_sched_push_job(struct amd_gpu_scheduler *sched,
299                        struct amd_sched_entity *c_entity,
300                        void *data,
301                        struct amd_sched_fence **fence)
302 {
303         struct amd_sched_job *job;
304
305         if (!fence)
306                 return -EINVAL;
307         job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL);
308         if (!job)
309                 return -ENOMEM;
310         job->sched = sched;
311         job->s_entity = c_entity;
312         job->data = data;
313         *fence = amd_sched_fence_create(c_entity);
314         if ((*fence) == NULL) {
315                 kfree(job);
316                 return -EINVAL;
317         }
318         job->s_fence = *fence;
319         while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *),
320                                    &c_entity->queue_lock) != sizeof(void *)) {
321                 /**
322                  * Current context used up all its IB slots
323                  * wait here, or need to check whether GPU is hung
324                 */
325                 schedule();
326         }
327
328         wake_up_interruptible(&sched->wait_queue);
329         return 0;
330 }
331
332 /**
333  * Wait for a virtual sequence number to be emitted.
334  *
335  * @c_entity    The pointer to a valid context entity
336  * @seq         The virtual sequence number to wait
337  * @intr        Interruptible or not
338  * @timeout     Timeout in ms, wait infinitely if <0
339  * @emit        wait for emit or signal
340  *
341  * return =0 signaled ,  <0 failed
342 */
343 int amd_sched_wait_emit(struct amd_sched_entity *c_entity,
344                         uint64_t seq,
345                         bool intr,
346                         long timeout)
347 {
348         atomic64_t *v_seq = &c_entity->last_emitted_v_seq;
349         wait_queue_head_t *wait_queue = &c_entity->wait_emit;
350
351         if (intr && (timeout < 0)) {
352                 wait_event_interruptible(
353                         *wait_queue,
354                         seq <= atomic64_read(v_seq));
355                 return 0;
356         } else if (intr && (timeout >= 0)) {
357                 wait_event_interruptible_timeout(
358                         *wait_queue,
359                         seq <= atomic64_read(v_seq),
360                         msecs_to_jiffies(timeout));
361                 return (seq <= atomic64_read(v_seq)) ?
362                         0 : -1;
363         } else if (!intr && (timeout < 0)) {
364                 wait_event(
365                         *wait_queue,
366                         seq <= atomic64_read(v_seq));
367                 return 0;
368         } else if (!intr && (timeout >= 0)) {
369                 wait_event_timeout(
370                         *wait_queue,
371                         seq <= atomic64_read(v_seq),
372                         msecs_to_jiffies(timeout));
373                 return (seq <= atomic64_read(v_seq)) ?
374                         0 : -1;
375         }
376         return 0;
377 }
378
379 static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
380 {
381         struct amd_sched_job *sched_job =
382                 container_of(cb, struct amd_sched_job, cb);
383         struct amd_gpu_scheduler *sched;
384         unsigned long flags;
385
386         sched = sched_job->sched;
387         atomic64_set(&sched_job->s_entity->last_signaled_v_seq,
388                      sched_job->s_fence->v_seq);
389         amd_sched_fence_signal(sched_job->s_fence);
390         spin_lock_irqsave(&sched->queue_lock, flags);
391         list_del(&sched_job->list);
392         atomic64_dec(&sched->hw_rq_count);
393         spin_unlock_irqrestore(&sched->queue_lock, flags);
394
395         sched->ops->process_job(sched, sched_job);
396         fence_put(&sched_job->s_fence->base);
397         kfree(sched_job);
398         wake_up_interruptible(&sched->wait_queue);
399 }
400
401 static int amd_sched_main(void *param)
402 {
403         int r;
404         struct amd_sched_job *job;
405         struct sched_param sparam = {.sched_priority = 1};
406         struct amd_sched_entity *c_entity = NULL;
407         struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
408
409         sched_setscheduler(current, SCHED_FIFO, &sparam);
410
411         while (!kthread_should_stop()) {
412                 struct fence *fence;
413
414                 wait_event_interruptible(sched->wait_queue,
415                                          is_scheduler_ready(sched) &&
416                                          (c_entity = select_context(sched)));
417                 r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *));
418                 if (r != sizeof(void *))
419                         continue;
420                 r = sched->ops->prepare_job(sched, c_entity, job);
421                 if (!r) {
422                         unsigned long flags;
423                         spin_lock_irqsave(&sched->queue_lock, flags);
424                         list_add_tail(&job->list, &sched->active_hw_rq);
425                         atomic64_inc(&sched->hw_rq_count);
426                         spin_unlock_irqrestore(&sched->queue_lock, flags);
427                 }
428                 mutex_lock(&sched->sched_lock);
429                 fence = sched->ops->run_job(sched, c_entity, job);
430                 if (fence) {
431                         r = fence_add_callback(fence, &job->cb,
432                                                amd_sched_process_job);
433                         if (r == -ENOENT)
434                                 amd_sched_process_job(fence, &job->cb);
435                         else if (r)
436                                 DRM_ERROR("fence add callback failed (%d)\n", r);
437                         fence_put(fence);
438                 }
439                 mutex_unlock(&sched->sched_lock);
440         }
441         return 0;
442 }
443
444 /**
445  * Create a gpu scheduler
446  *
447  * @device      The device context for this scheduler
448  * @ops         The backend operations for this scheduler.
449  * @id          The scheduler is per ring, here is ring id.
450  * @granularity The minumum ms unit the scheduler will scheduled.
451  * @preemption  Indicate whether this ring support preemption, 0 is no.
452  *
453  * return the pointer to scheduler for success, otherwise return NULL
454 */
455 struct amd_gpu_scheduler *amd_sched_create(void *device,
456                                            struct amd_sched_backend_ops *ops,
457                                            unsigned ring,
458                                            unsigned granularity,
459                                            unsigned preemption,
460                                            unsigned hw_submission)
461 {
462         struct amd_gpu_scheduler *sched;
463         char name[20];
464
465         sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL);
466         if (!sched)
467                 return NULL;
468
469         sched->device = device;
470         sched->ops = ops;
471         sched->granularity = granularity;
472         sched->ring_id = ring;
473         sched->preemption = preemption;
474         sched->hw_submission_limit = hw_submission;
475         snprintf(name, sizeof(name), "gpu_sched[%d]", ring);
476         mutex_init(&sched->sched_lock);
477         spin_lock_init(&sched->queue_lock);
478         init_rq(&sched->sched_rq);
479         sched->sched_rq.check_entity_status = gpu_entity_check_status;
480
481         init_rq(&sched->kernel_rq);
482         sched->kernel_rq.check_entity_status = gpu_entity_check_status;
483
484         init_waitqueue_head(&sched->wait_queue);
485         INIT_LIST_HEAD(&sched->active_hw_rq);
486         atomic64_set(&sched->hw_rq_count, 0);
487         /* Each scheduler will run on a seperate kernel thread */
488         sched->thread = kthread_create(amd_sched_main, sched, name);
489         if (sched->thread) {
490                 wake_up_process(sched->thread);
491                 return sched;
492         }
493
494         DRM_ERROR("Failed to create scheduler for id %d.\n", ring);
495         kfree(sched);
496         return NULL;
497 }
498
499 /**
500  * Destroy a gpu scheduler
501  *
502  * @sched       The pointer to the scheduler
503  *
504  * return 0 if succeed. -1 if failed.
505  */
506 int amd_sched_destroy(struct amd_gpu_scheduler *sched)
507 {
508         kthread_stop(sched->thread);
509         kfree(sched);
510         return  0;
511 }
512
513 /**
514  * Update emitted sequence and wake up the waiters, called by run_job
515  * in driver side
516  *
517  * @entity The context entity
518  * @seq The sequence number for the latest emitted job
519 */
520 void amd_sched_emit(struct amd_sched_entity *c_entity, uint64_t seq)
521 {
522         atomic64_set(&c_entity->last_emitted_v_seq, seq);
523         wake_up_all(&c_entity->wait_emit);
524 }
525
526 /**
527  * Get next queued sequence number
528  *
529  * @entity The context entity
530  *
531  * return the next queued sequence number
532 */
533 uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity)
534 {
535         return atomic64_read(&c_entity->last_queued_v_seq) + 1;
536 }