drivers/gpu/drm/amd/scheduler/gpu_scheduler.c

   1 /*
   2  * Copyright 2015 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  *
  23  */
  24 #include <linux/kthread.h>
  25 #include <linux/wait.h>
  26 #include <linux/sched.h>
  27 #include <drm/drmP.h>
  28 #include "gpu_scheduler.h"
  29
  30 /* Initialize a given run queue struct */
  31 static void init_rq(struct amd_run_queue *rq)
  32 {
  33         INIT_LIST_HEAD(&rq->head.list);
  34         rq->head.belongto_rq = rq;
  35         mutex_init(&rq->lock);
  36         atomic_set(&rq->nr_entity, 0);
  37         rq->current_entity = &rq->head;
  38 }
  39
  40 /* Note: caller must hold the lock or in a atomic context */
  41 static void rq_remove_entity(struct amd_run_queue *rq,
  42                              struct amd_sched_entity *entity)
  43 {
  44         if (rq->current_entity == entity)
  45                 rq->current_entity = list_entry(entity->list.prev,
  46                                                 typeof(*entity), list);
  47         list_del_init(&entity->list);
  48         atomic_dec(&rq->nr_entity);
  49 }
  50
  51 static void rq_add_entity(struct amd_run_queue *rq,
  52                           struct amd_sched_entity *entity)
  53 {
  54         list_add_tail(&entity->list, &rq->head.list);
  55         atomic_inc(&rq->nr_entity);
  56 }
  57
  58 /**
  59  * Select next entity from a specified run queue with round robin policy.
  60  * It could return the same entity as current one if current is the only
  61  * available one in the queue. Return NULL if nothing available.
  62  */
  63 static struct amd_sched_entity *rq_select_entity(struct amd_run_queue *rq)
  64 {
  65         struct amd_sched_entity *p = rq->current_entity;
  66         int i = atomic_read(&rq->nr_entity) + 1; /*real count + dummy head*/
  67
  68         while (i) {
  69                 p = list_entry(p->list.next, typeof(*p), list);
  70                 if (!rq->check_entity_status(p)) {
  71                         rq->current_entity = p;
  72                         break;
  73                 }
  74                 i--;
  75         }
  76         return i ? p : NULL;
  77 }
  78
  79 static bool context_entity_is_waiting(struct amd_sched_entity *entity)
  80 {
  81         /* TODO: sync obj for multi-ring synchronization */
  82         return false;
  83 }
  84
  85 static int gpu_entity_check_status(struct amd_sched_entity *entity)
  86 {
  87         if (entity == &entity->belongto_rq->head)
  88                 return -1;
  89
  90         if (kfifo_is_empty(&entity->job_queue) ||
  91             context_entity_is_waiting(entity))
  92                 return -1;
  93
  94         return 0;
  95 }
  96
  97 /**
  98  * Note: This function should only been called inside scheduler main
  99  * function for thread safety, there is no other protection here.
 100  * return ture if scheduler has something ready to run.
 101  *
 102  * For active_hw_rq, there is only one producer(scheduler thread) and
 103  * one consumer(ISR). It should be safe to use this function in scheduler
 104  * main thread to decide whether to continue emit more IBs.
 105 */
 106 static bool is_scheduler_ready(struct amd_gpu_scheduler *sched)
 107 {
 108         unsigned long flags;
 109         bool full;
 110
 111         spin_lock_irqsave(&sched->queue_lock, flags);
 112         full = atomic64_read(&sched->hw_rq_count) <
 113                 sched->hw_submission_limit ? true : false;
 114         spin_unlock_irqrestore(&sched->queue_lock, flags);
 115
 116         return full;
 117 }
 118
 119 /**
 120  * Select next entity from the kernel run queue, if not available,
 121  * return null.
 122 */
 123 static struct amd_sched_entity *
 124 kernel_rq_select_context(struct amd_gpu_scheduler *sched)
 125 {
 126         struct amd_sched_entity *sched_entity;
 127         struct amd_run_queue *rq = &sched->kernel_rq;
 128
 129         mutex_lock(&rq->lock);
 130         sched_entity = rq_select_entity(rq);
 131         mutex_unlock(&rq->lock);
 132         return sched_entity;
 133 }
 134
 135 /**
 136  * Select next entity containing real IB submissions
 137 */
 138 static struct amd_sched_entity *
 139 select_context(struct amd_gpu_scheduler *sched)
 140 {
 141         struct amd_sched_entity *wake_entity = NULL;
 142         struct amd_sched_entity *tmp;
 143         struct amd_run_queue *rq;
 144
 145         if (!is_scheduler_ready(sched))
 146                 return NULL;
 147
 148         /* Kernel run queue has higher priority than normal run queue*/
 149         tmp = kernel_rq_select_context(sched);
 150         if (tmp != NULL)
 151                 goto exit;
 152
 153         rq = &sched->sched_rq;
 154         mutex_lock(&rq->lock);
 155         tmp = rq_select_entity(rq);
 156         mutex_unlock(&rq->lock);
 157 exit:
 158         if (sched->current_entity && (sched->current_entity != tmp))
 159                 wake_entity = sched->current_entity;
 160         sched->current_entity = tmp;
 161         if (wake_entity)
 162                 wake_up(&wake_entity->wait_queue);
 163         return tmp;
 164 }
 165
 166 /**
 167  * Init a context entity used by scheduler when submit to HW ring.
 168  *
 169  * @sched       The pointer to the scheduler
 170  * @entity      The pointer to a valid amd_sched_entity
 171  * @rq          The run queue this entity belongs
 172  * @kernel      If this is an entity for the kernel
 173  * @jobs        The max number of jobs in the job queue
 174  *
 175  * return 0 if succeed. negative error code on failure
 176 */
 177 int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 178                           struct amd_sched_entity *entity,
 179                           struct amd_run_queue *rq,
 180                           uint32_t jobs)
 181 {
 182         uint64_t seq_ring = 0;
 183         char name[20];
 184
 185         if (!(sched && entity && rq))
 186                 return -EINVAL;
 187
 188         memset(entity, 0, sizeof(struct amd_sched_entity));
 189         seq_ring = ((uint64_t)sched->ring_id) << 60;
 190         spin_lock_init(&entity->lock);
 191         entity->belongto_rq = rq;
 192         entity->scheduler = sched;
 193         init_waitqueue_head(&entity->wait_queue);
 194         init_waitqueue_head(&entity->wait_emit);
 195         entity->fence_context = fence_context_alloc(1);
 196         snprintf(name, sizeof(name), "c_entity[%llu]", entity->fence_context);
 197         memcpy(entity->name, name, 20);
 198         INIT_LIST_HEAD(&entity->fence_list);
 199         if(kfifo_alloc(&entity->job_queue,
 200                        jobs * sizeof(void *),
 201                        GFP_KERNEL))
 202                 return -EINVAL;
 203
 204         spin_lock_init(&entity->queue_lock);
 205         atomic64_set(&entity->last_emitted_v_seq, seq_ring);
 206         atomic64_set(&entity->last_queued_v_seq, seq_ring);
 207         atomic64_set(&entity->last_signaled_v_seq, seq_ring);
 208
 209         /* Add the entity to the run queue */
 210         mutex_lock(&rq->lock);
 211         rq_add_entity(rq, entity);
 212         mutex_unlock(&rq->lock);
 213         return 0;
 214 }
 215
 216 /**
 217  * Query if entity is initialized
 218  *
 219  * @sched       Pointer to scheduler instance
 220  * @entity      The pointer to a valid scheduler entity
 221  *
 222  * return true if entity is initialized, false otherwise
 223 */
 224 static bool is_context_entity_initialized(struct amd_gpu_scheduler *sched,
 225                                           struct amd_sched_entity *entity)
 226 {
 227         return entity->scheduler == sched &&
 228                 entity->belongto_rq != NULL;
 229 }
 230
 231 static bool is_context_entity_idle(struct amd_gpu_scheduler *sched,
 232                                    struct amd_sched_entity *entity)
 233 {
 234         /**
 235          * Idle means no pending IBs, and the entity is not
 236          * currently being used.
 237         */
 238         barrier();
 239         if ((sched->current_entity != entity) &&
 240             kfifo_is_empty(&entity->job_queue))
 241                 return true;
 242
 243         return false;
 244 }
 245
 246 /**
 247  * Destroy a context entity
 248  *
 249  * @sched       Pointer to scheduler instance
 250  * @entity      The pointer to a valid scheduler entity
 251  *
 252  * return 0 if succeed. negative error code on failure
 253  */
 254 int amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
 255                             struct amd_sched_entity *entity)
 256 {
 257         int r = 0;
 258         struct amd_run_queue *rq = entity->belongto_rq;
 259
 260         if (!is_context_entity_initialized(sched, entity))
 261                 return 0;
 262
 263         /**
 264          * The client will not queue more IBs during this fini, consume existing
 265          * queued IBs
 266         */
 267         r = wait_event_timeout(
 268                 entity->wait_queue,
 269                 is_context_entity_idle(sched, entity),
 270                 msecs_to_jiffies(AMD_GPU_WAIT_IDLE_TIMEOUT_IN_MS)
 271                 ) ?  0 : -1;
 272
 273         if (r) {
 274                 if (entity->is_pending)
 275                         DRM_INFO("Entity %p is in waiting state during fini,\
 276                                 all pending ibs will be canceled.\n",
 277                                  entity);
 278         }
 279
 280         mutex_lock(&rq->lock);
 281         rq_remove_entity(rq, entity);
 282         mutex_unlock(&rq->lock);
 283         kfifo_free(&entity->job_queue);
 284         return r;
 285 }
 286
 287 /**
 288  * Submit a normal job to the job queue
 289  *
 290  * @sched       The pointer to the scheduler
 291  * @c_entity    The pointer to amd_sched_entity
 292  * @job         The pointer to job required to submit
 293  * return 0 if succeed. -1 if failed.
 294  *        -2 indicate queue is full for this client, client should wait untill
 295  *           scheduler consum some queued command.
 296  *        -1 other fail.
 297 */
 298 int amd_sched_push_job(struct amd_gpu_scheduler *sched,
 299                        struct amd_sched_entity *c_entity,
 300                        void *data,
 301                        struct amd_sched_fence **fence)
 302 {
 303         struct amd_sched_job *job;
 304
 305         if (!fence)
 306                 return -EINVAL;
 307         job = kzalloc(sizeof(struct amd_sched_job), GFP_KERNEL);
 308         if (!job)
 309                 return -ENOMEM;
 310         job->sched = sched;
 311         job->s_entity = c_entity;
 312         job->data = data;
 313         *fence = amd_sched_fence_create(c_entity);
 314         if ((*fence) == NULL) {
 315                 kfree(job);
 316                 return -EINVAL;
 317         }
 318         job->s_fence = *fence;
 319         while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *),
 320                                    &c_entity->queue_lock) != sizeof(void *)) {
 321                 /**
 322                  * Current context used up all its IB slots
 323                  * wait here, or need to check whether GPU is hung
 324                 */
 325                 schedule();
 326         }
 327
 328         wake_up_interruptible(&sched->wait_queue);
 329         return 0;
 330 }
 331
 332 /**
 333  * Wait for a virtual sequence number to be emitted.
 334  *
 335  * @c_entity    The pointer to a valid context entity
 336  * @seq         The virtual sequence number to wait
 337  * @intr        Interruptible or not
 338  * @timeout     Timeout in ms, wait infinitely if <0
 339  * @emit        wait for emit or signal
 340  *
 341  * return =0 signaled ,  <0 failed
 342 */
 343 int amd_sched_wait_emit(struct amd_sched_entity *c_entity,
 344                         uint64_t seq,
 345                         bool intr,
 346                         long timeout)
 347 {
 348         atomic64_t *v_seq = &c_entity->last_emitted_v_seq;
 349         wait_queue_head_t *wait_queue = &c_entity->wait_emit;
 350
 351         if (intr && (timeout < 0)) {
 352                 wait_event_interruptible(
 353                         *wait_queue,
 354                         seq <= atomic64_read(v_seq));
 355                 return 0;
 356         } else if (intr && (timeout >= 0)) {
 357                 wait_event_interruptible_timeout(
 358                         *wait_queue,
 359                         seq <= atomic64_read(v_seq),
 360                         msecs_to_jiffies(timeout));
 361                 return (seq <= atomic64_read(v_seq)) ?
 362                         0 : -1;
 363         } else if (!intr && (timeout < 0)) {
 364                 wait_event(
 365                         *wait_queue,
 366                         seq <= atomic64_read(v_seq));
 367                 return 0;
 368         } else if (!intr && (timeout >= 0)) {
 369                 wait_event_timeout(
 370                         *wait_queue,
 371                         seq <= atomic64_read(v_seq),
 372                         msecs_to_jiffies(timeout));
 373                 return (seq <= atomic64_read(v_seq)) ?
 374                         0 : -1;
 375         }
 376         return 0;
 377 }
 378
 379 static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
 380 {
 381         struct amd_sched_job *sched_job =
 382                 container_of(cb, struct amd_sched_job, cb);
 383         struct amd_gpu_scheduler *sched;
 384         unsigned long flags;
 385
 386         sched = sched_job->sched;
 387         atomic64_set(&sched_job->s_entity->last_signaled_v_seq,
 388                      sched_job->s_fence->v_seq);
 389         amd_sched_fence_signal(sched_job->s_fence);
 390         spin_lock_irqsave(&sched->queue_lock, flags);
 391         list_del(&sched_job->list);
 392         atomic64_dec(&sched->hw_rq_count);
 393         spin_unlock_irqrestore(&sched->queue_lock, flags);
 394
 395         sched->ops->process_job(sched, sched_job);
 396         fence_put(&sched_job->s_fence->base);
 397         kfree(sched_job);
 398         wake_up_interruptible(&sched->wait_queue);
 399 }
 400
 401 static int amd_sched_main(void *param)
 402 {
 403         int r;
 404         struct amd_sched_job *job;
 405         struct sched_param sparam = {.sched_priority = 1};
 406         struct amd_sched_entity *c_entity = NULL;
 407         struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
 408
 409         sched_setscheduler(current, SCHED_FIFO, &sparam);
 410
 411         while (!kthread_should_stop()) {
 412                 struct fence *fence;
 413
 414                 wait_event_interruptible(sched->wait_queue,
 415                                          is_scheduler_ready(sched) &&
 416                                          (c_entity = select_context(sched)));
 417                 r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *));
 418                 if (r != sizeof(void *))
 419                         continue;
 420                 r = sched->ops->prepare_job(sched, c_entity, job);
 421                 if (!r) {
 422                         unsigned long flags;
 423                         spin_lock_irqsave(&sched->queue_lock, flags);
 424                         list_add_tail(&job->list, &sched->active_hw_rq);
 425                         atomic64_inc(&sched->hw_rq_count);
 426                         spin_unlock_irqrestore(&sched->queue_lock, flags);
 427                 }
 428                 mutex_lock(&sched->sched_lock);
 429                 fence = sched->ops->run_job(sched, c_entity, job);
 430                 if (fence) {
 431                         r = fence_add_callback(fence, &job->cb,
 432                                                amd_sched_process_job);
 433                         if (r == -ENOENT)
 434                                 amd_sched_process_job(fence, &job->cb);
 435                         else if (r)
 436                                 DRM_ERROR("fence add callback failed (%d)\n", r);
 437                         fence_put(fence);
 438                 }
 439                 mutex_unlock(&sched->sched_lock);
 440         }
 441         return 0;
 442 }
 443
 444 /**
 445  * Create a gpu scheduler
 446  *
 447  * @device      The device context for this scheduler
 448  * @ops         The backend operations for this scheduler.
 449  * @id          The scheduler is per ring, here is ring id.
 450  * @granularity The minumum ms unit the scheduler will scheduled.
 451  * @preemption  Indicate whether this ring support preemption, 0 is no.
 452  *
 453  * return the pointer to scheduler for success, otherwise return NULL
 454 */
 455 struct amd_gpu_scheduler *amd_sched_create(void *device,
 456                                            struct amd_sched_backend_ops *ops,
 457                                            unsigned ring,
 458                                            unsigned granularity,
 459                                            unsigned preemption,
 460                                            unsigned hw_submission)
 461 {
 462         struct amd_gpu_scheduler *sched;
 463         char name[20];
 464
 465         sched = kzalloc(sizeof(struct amd_gpu_scheduler), GFP_KERNEL);
 466         if (!sched)
 467                 return NULL;
 468
 469         sched->device = device;
 470         sched->ops = ops;
 471         sched->granularity = granularity;
 472         sched->ring_id = ring;
 473         sched->preemption = preemption;
 474         sched->hw_submission_limit = hw_submission;
 475         snprintf(name, sizeof(name), "gpu_sched[%d]", ring);
 476         mutex_init(&sched->sched_lock);
 477         spin_lock_init(&sched->queue_lock);
 478         init_rq(&sched->sched_rq);
 479         sched->sched_rq.check_entity_status = gpu_entity_check_status;
 480
 481         init_rq(&sched->kernel_rq);
 482         sched->kernel_rq.check_entity_status = gpu_entity_check_status;
 483
 484         init_waitqueue_head(&sched->wait_queue);
 485         INIT_LIST_HEAD(&sched->active_hw_rq);
 486         atomic64_set(&sched->hw_rq_count, 0);
 487         /* Each scheduler will run on a seperate kernel thread */
 488         sched->thread = kthread_create(amd_sched_main, sched, name);
 489         if (sched->thread) {
 490                 wake_up_process(sched->thread);
 491                 return sched;
 492         }
 493
 494         DRM_ERROR("Failed to create scheduler for id %d.\n", ring);
 495         kfree(sched);
 496         return NULL;
 497 }
 498
 499 /**
 500  * Destroy a gpu scheduler
 501  *
 502  * @sched       The pointer to the scheduler
 503  *
 504  * return 0 if succeed. -1 if failed.
 505  */
 506 int amd_sched_destroy(struct amd_gpu_scheduler *sched)
 507 {
 508         kthread_stop(sched->thread);
 509         kfree(sched);
 510         return  0;
 511 }
 512
 513 /**
 514  * Update emitted sequence and wake up the waiters, called by run_job
 515  * in driver side
 516  *
 517  * @entity The context entity
 518  * @seq The sequence number for the latest emitted job
 519 */
 520 void amd_sched_emit(struct amd_sched_entity *c_entity, uint64_t seq)
 521 {
 522         atomic64_set(&c_entity->last_emitted_v_seq, seq);
 523         wake_up_all(&c_entity->wait_emit);
 524 }
 525
 526 /**
 527  * Get next queued sequence number
 528  *
 529  * @entity The context entity
 530  *
 531  * return the next queued sequence number
 532 */
 533 uint64_t amd_sched_next_queued_seq(struct amd_sched_entity *c_entity)
 534 {
 535         return atomic64_read(&c_entity->last_queued_v_seq) + 1;
 536 }