drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

   1 /*
   2  * Copyright 2009 Jerome Glisse.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  16  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  17  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  18  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  19  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  20  *
  21  * The above copyright notice and this permission notice (including the
  22  * next paragraph) shall be included in all copies or substantial portions
  23  * of the Software.
  24  *
  25  */
  26 /*
  27  * Authors:
  28  *    Jerome Glisse <glisse@freedesktop.org>
  29  *    Dave Airlie
  30  */
  31 #include <linux/seq_file.h>
  32 #include <linux/atomic.h>
  33 #include <linux/wait.h>
  34 #include <linux/kref.h>
  35 #include <linux/slab.h>
  36 #include <linux/firmware.h>
  37 #include <drm/drmP.h>
  38 #include "amdgpu.h"
  39 #include "amdgpu_trace.h"
  40
  41 /*
  42  * Fences
  43  * Fences mark an event in the GPUs pipeline and are used
  44  * for GPU/CPU synchronization.  When the fence is written,
  45  * it is expected that all buffers associated with that fence
  46  * are no longer in use by the associated ring on the GPU and
  47  * that the the relevant GPU caches have been flushed.
  48  */
  49
  50 /**
  51  * amdgpu_fence_write - write a fence value
  52  *
  53  * @ring: ring the fence is associated with
  54  * @seq: sequence number to write
  55  *
  56  * Writes a fence value to memory (all asics).
  57  */
  58 static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq)
  59 {
  60         struct amdgpu_fence_driver *drv = &ring->fence_drv;
  61
  62         if (drv->cpu_addr)
  63                 *drv->cpu_addr = cpu_to_le32(seq);
  64 }
  65
  66 /**
  67  * amdgpu_fence_read - read a fence value
  68  *
  69  * @ring: ring the fence is associated with
  70  *
  71  * Reads a fence value from memory (all asics).
  72  * Returns the value of the fence read from memory.
  73  */
  74 static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
  75 {
  76         struct amdgpu_fence_driver *drv = &ring->fence_drv;
  77         u32 seq = 0;
  78
  79         if (drv->cpu_addr)
  80                 seq = le32_to_cpu(*drv->cpu_addr);
  81         else
  82                 seq = lower_32_bits(atomic64_read(&drv->last_seq));
  83
  84         return seq;
  85 }
  86
  87 /**
  88  * amdgpu_fence_schedule_check - schedule lockup check
  89  *
  90  * @ring: pointer to struct amdgpu_ring
  91  *
  92  * Queues a delayed work item to check for lockups.
  93  */
  94 static void amdgpu_fence_schedule_check(struct amdgpu_ring *ring)
  95 {
  96         /*
  97          * Do not reset the timer here with mod_delayed_work,
  98          * this can livelock in an interaction with TTM delayed destroy.
  99          */
 100         queue_delayed_work(system_power_efficient_wq,
 101                 &ring->fence_drv.lockup_work,
 102                 AMDGPU_FENCE_JIFFIES_TIMEOUT);
 103 }
 104
 105 /**
 106  * amdgpu_fence_emit - emit a fence on the requested ring
 107  *
 108  * @ring: ring the fence is associated with
 109  * @owner: creator of the fence
 110  * @fence: amdgpu fence object
 111  *
 112  * Emits a fence command on the requested ring (all asics).
 113  * Returns 0 on success, -ENOMEM on failure.
 114  */
 115 int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
 116                       struct amdgpu_fence **fence)
 117 {
 118         struct amdgpu_device *adev = ring->adev;
 119
 120         /* we are protected by the ring emission mutex */
 121         *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
 122         if ((*fence) == NULL) {
 123                 return -ENOMEM;
 124         }
 125         (*fence)->seq = ++ring->fence_drv.sync_seq[ring->idx];
 126         (*fence)->ring = ring;
 127         (*fence)->owner = owner;
 128         fence_init(&(*fence)->base, &amdgpu_fence_ops,
 129                 &adev->fence_queue.lock, adev->fence_context + ring->idx,
 130                 (*fence)->seq);
 131         amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
 132                                (*fence)->seq,
 133                                AMDGPU_FENCE_FLAG_INT);
 134         trace_amdgpu_fence_emit(ring->adev->ddev, ring->idx, (*fence)->seq);
 135         return 0;
 136 }
 137
 138 /**
 139  * amdgpu_fence_recreate - recreate a fence from an user fence
 140  *
 141  * @ring: ring the fence is associated with
 142  * @owner: creator of the fence
 143  * @seq: user fence sequence number
 144  * @fence: resulting amdgpu fence object
 145  *
 146  * Recreates a fence command from the user fence sequence number (all asics).
 147  * Returns 0 on success, -ENOMEM on failure.
 148  */
 149 int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner,
 150                           uint64_t seq, struct amdgpu_fence **fence)
 151 {
 152         struct amdgpu_device *adev = ring->adev;
 153
 154         if (seq > ring->fence_drv.sync_seq[ring->idx])
 155                 return -EINVAL;
 156
 157         *fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
 158         if ((*fence) == NULL)
 159                 return -ENOMEM;
 160
 161         (*fence)->seq = seq;
 162         (*fence)->ring = ring;
 163         (*fence)->owner = owner;
 164         fence_init(&(*fence)->base, &amdgpu_fence_ops,
 165                 &adev->fence_queue.lock, adev->fence_context + ring->idx,
 166                 (*fence)->seq);
 167         return 0;
 168 }
 169
 170 /**
 171  * amdgpu_fence_check_signaled - callback from fence_queue
 172  *
 173  * this function is called with fence_queue lock held, which is also used
 174  * for the fence locking itself, so unlocked variants are used for
 175  * fence_signal, and remove_wait_queue.
 176  */
 177 static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
 178 {
 179         struct amdgpu_fence *fence;
 180         struct amdgpu_device *adev;
 181         u64 seq;
 182         int ret;
 183
 184         fence = container_of(wait, struct amdgpu_fence, fence_wake);
 185         adev = fence->ring->adev;
 186
 187         /*
 188          * We cannot use amdgpu_fence_process here because we're already
 189          * in the waitqueue, in a call from wake_up_all.
 190          */
 191         seq = atomic64_read(&fence->ring->fence_drv.last_seq);
 192         if (seq >= fence->seq) {
 193                 ret = fence_signal_locked(&fence->base);
 194                 if (!ret)
 195                         FENCE_TRACE(&fence->base, "signaled from irq context\n");
 196                 else
 197                         FENCE_TRACE(&fence->base, "was already signaled\n");
 198
 199                 amdgpu_irq_put(adev, fence->ring->fence_drv.irq_src,
 200                                 fence->ring->fence_drv.irq_type);
 201                 __remove_wait_queue(&adev->fence_queue, &fence->fence_wake);
 202                 fence_put(&fence->base);
 203         } else
 204                 FENCE_TRACE(&fence->base, "pending\n");
 205         return 0;
 206 }
 207
 208 /**
 209  * amdgpu_fence_activity - check for fence activity
 210  *
 211  * @ring: pointer to struct amdgpu_ring
 212  *
 213  * Checks the current fence value and calculates the last
 214  * signalled fence value. Returns true if activity occured
 215  * on the ring, and the fence_queue should be waken up.
 216  */
 217 static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
 218 {
 219         uint64_t seq, last_seq, last_emitted;
 220         unsigned count_loop = 0;
 221         bool wake = false;
 222
 223         /* Note there is a scenario here for an infinite loop but it's
 224          * very unlikely to happen. For it to happen, the current polling
 225          * process need to be interrupted by another process and another
 226          * process needs to update the last_seq btw the atomic read and
 227          * xchg of the current process.
 228          *
 229          * More over for this to go in infinite loop there need to be
 230          * continuously new fence signaled ie amdgpu_fence_read needs
 231          * to return a different value each time for both the currently
 232          * polling process and the other process that xchg the last_seq
 233          * btw atomic read and xchg of the current process. And the
 234          * value the other process set as last seq must be higher than
 235          * the seq value we just read. Which means that current process
 236          * need to be interrupted after amdgpu_fence_read and before
 237          * atomic xchg.
 238          *
 239          * To be even more safe we count the number of time we loop and
 240          * we bail after 10 loop just accepting the fact that we might
 241          * have temporarly set the last_seq not to the true real last
 242          * seq but to an older one.
 243          */
 244         last_seq = atomic64_read(&ring->fence_drv.last_seq);
 245         do {
 246                 last_emitted = ring->fence_drv.sync_seq[ring->idx];
 247                 seq = amdgpu_fence_read(ring);
 248                 seq |= last_seq & 0xffffffff00000000LL;
 249                 if (seq < last_seq) {
 250                         seq &= 0xffffffff;
 251                         seq |= last_emitted & 0xffffffff00000000LL;
 252                 }
 253
 254                 if (seq <= last_seq || seq > last_emitted) {
 255                         break;
 256                 }
 257                 /* If we loop over we don't want to return without
 258                  * checking if a fence is signaled as it means that the
 259                  * seq we just read is different from the previous on.
 260                  */
 261                 wake = true;
 262                 last_seq = seq;
 263                 if ((count_loop++) > 10) {
 264                         /* We looped over too many time leave with the
 265                          * fact that we might have set an older fence
 266                          * seq then the current real last seq as signaled
 267                          * by the hw.
 268                          */
 269                         break;
 270                 }
 271         } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
 272
 273         if (seq < last_emitted)
 274                 amdgpu_fence_schedule_check(ring);
 275
 276         return wake;
 277 }
 278
 279 /**
 280  * amdgpu_fence_check_lockup - check for hardware lockup
 281  *
 282  * @work: delayed work item
 283  *
 284  * Checks for fence activity and if there is none probe
 285  * the hardware if a lockup occured.
 286  */
 287 static void amdgpu_fence_check_lockup(struct work_struct *work)
 288 {
 289         struct amdgpu_fence_driver *fence_drv;
 290         struct amdgpu_ring *ring;
 291
 292         fence_drv = container_of(work, struct amdgpu_fence_driver,
 293                                 lockup_work.work);
 294         ring = fence_drv->ring;
 295
 296         if (!down_read_trylock(&ring->adev->exclusive_lock)) {
 297                 /* just reschedule the check if a reset is going on */
 298                 amdgpu_fence_schedule_check(ring);
 299                 return;
 300         }
 301
 302         if (fence_drv->delayed_irq && ring->adev->ddev->irq_enabled) {
 303                 fence_drv->delayed_irq = false;
 304                 amdgpu_irq_update(ring->adev, fence_drv->irq_src,
 305                                 fence_drv->irq_type);
 306         }
 307
 308         if (amdgpu_fence_activity(ring))
 309                 wake_up_all(&ring->adev->fence_queue);
 310         else if (amdgpu_ring_is_lockup(ring)) {
 311                 /* good news we believe it's a lockup */
 312                 dev_warn(ring->adev->dev, "GPU lockup (current fence id "
 313                         "0x%016llx last fence id 0x%016llx on ring %d)\n",
 314                         (uint64_t)atomic64_read(&fence_drv->last_seq),
 315                         fence_drv->sync_seq[ring->idx], ring->idx);
 316
 317                 /* remember that we need an reset */
 318                 ring->adev->needs_reset = true;
 319                 wake_up_all(&ring->adev->fence_queue);
 320         }
 321         up_read(&ring->adev->exclusive_lock);
 322 }
 323
 324 /**
 325  * amdgpu_fence_process - process a fence
 326  *
 327  * @adev: amdgpu_device pointer
 328  * @ring: ring index the fence is associated with
 329  *
 330  * Checks the current fence value and wakes the fence queue
 331  * if the sequence number has increased (all asics).
 332  */
 333 void amdgpu_fence_process(struct amdgpu_ring *ring)
 334 {
 335         uint64_t seq, last_seq, last_emitted;
 336         unsigned count_loop = 0;
 337         bool wake = false;
 338
 339         /* Note there is a scenario here for an infinite loop but it's
 340          * very unlikely to happen. For it to happen, the current polling
 341          * process need to be interrupted by another process and another
 342          * process needs to update the last_seq btw the atomic read and
 343          * xchg of the current process.
 344          *
 345          * More over for this to go in infinite loop there need to be
 346          * continuously new fence signaled ie amdgpu_fence_read needs
 347          * to return a different value each time for both the currently
 348          * polling process and the other process that xchg the last_seq
 349          * btw atomic read and xchg of the current process. And the
 350          * value the other process set as last seq must be higher than
 351          * the seq value we just read. Which means that current process
 352          * need to be interrupted after amdgpu_fence_read and before
 353          * atomic xchg.
 354          *
 355          * To be even more safe we count the number of time we loop and
 356          * we bail after 10 loop just accepting the fact that we might
 357          * have temporarly set the last_seq not to the true real last
 358          * seq but to an older one.
 359          */
 360         last_seq = atomic64_read(&ring->fence_drv.last_seq);
 361         do {
 362                 last_emitted = ring->fence_drv.sync_seq[ring->idx];
 363                 seq = amdgpu_fence_read(ring);
 364                 seq |= last_seq & 0xffffffff00000000LL;
 365                 if (seq < last_seq) {
 366                         seq &= 0xffffffff;
 367                         seq |= last_emitted & 0xffffffff00000000LL;
 368                 }
 369
 370                 if (seq <= last_seq || seq > last_emitted) {
 371                         break;
 372                 }
 373                 /* If we loop over we don't want to return without
 374                  * checking if a fence is signaled as it means that the
 375                  * seq we just read is different from the previous on.
 376                  */
 377                 wake = true;
 378                 last_seq = seq;
 379                 if ((count_loop++) > 10) {
 380                         /* We looped over too many time leave with the
 381                          * fact that we might have set an older fence
 382                          * seq then the current real last seq as signaled
 383                          * by the hw.
 384                          */
 385                         break;
 386                 }
 387         } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
 388
 389         if (wake)
 390                 wake_up_all(&ring->adev->fence_queue);
 391 }
 392
 393 /**
 394  * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled
 395  *
 396  * @ring: ring the fence is associated with
 397  * @seq: sequence number
 398  *
 399  * Check if the last signaled fence sequnce number is >= the requested
 400  * sequence number (all asics).
 401  * Returns true if the fence has signaled (current fence value
 402  * is >= requested value) or false if it has not (current fence
 403  * value is < the requested value.  Helper function for
 404  * amdgpu_fence_signaled().
 405  */
 406 static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
 407 {
 408         if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
 409                 return true;
 410
 411         /* poll new last sequence at least once */
 412         amdgpu_fence_process(ring);
 413         if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
 414                 return true;
 415
 416         return false;
 417 }
 418
 419 static bool amdgpu_fence_is_signaled(struct fence *f)
 420 {
 421         struct amdgpu_fence *fence = to_amdgpu_fence(f);
 422         struct amdgpu_ring *ring = fence->ring;
 423         struct amdgpu_device *adev = ring->adev;
 424
 425         if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
 426                 return true;
 427
 428         if (down_read_trylock(&adev->exclusive_lock)) {
 429                 amdgpu_fence_process(ring);
 430                 up_read(&adev->exclusive_lock);
 431
 432                 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
 433                         return true;
 434         }
 435         return false;
 436 }
 437
 438 /**
 439  * amdgpu_fence_enable_signaling - enable signalling on fence
 440  * @fence: fence
 441  *
 442  * This function is called with fence_queue lock held, and adds a callback
 443  * to fence_queue that checks if this fence is signaled, and if so it
 444  * signals the fence and removes itself.
 445  */
 446 static bool amdgpu_fence_enable_signaling(struct fence *f)
 447 {
 448         struct amdgpu_fence *fence = to_amdgpu_fence(f);
 449         struct amdgpu_ring *ring = fence->ring;
 450         struct amdgpu_device *adev = ring->adev;
 451
 452         if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
 453                 return false;
 454
 455         if (down_read_trylock(&adev->exclusive_lock)) {
 456                 amdgpu_irq_get(adev, ring->fence_drv.irq_src,
 457                         ring->fence_drv.irq_type);
 458                 if (amdgpu_fence_activity(ring))
 459                         wake_up_all_locked(&adev->fence_queue);
 460
 461                 /* did fence get signaled after we enabled the sw irq? */
 462                 if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq) {
 463                         amdgpu_irq_put(adev, ring->fence_drv.irq_src,
 464                                 ring->fence_drv.irq_type);
 465                         up_read(&adev->exclusive_lock);
 466                         return false;
 467                 }
 468
 469                 up_read(&adev->exclusive_lock);
 470         } else {
 471                 /* we're probably in a lockup, lets not fiddle too much */
 472                 if (amdgpu_irq_get_delayed(adev, ring->fence_drv.irq_src,
 473                         ring->fence_drv.irq_type))
 474                         ring->fence_drv.delayed_irq = true;
 475                 amdgpu_fence_schedule_check(ring);
 476         }
 477
 478         fence->fence_wake.flags = 0;
 479         fence->fence_wake.private = NULL;
 480         fence->fence_wake.func = amdgpu_fence_check_signaled;
 481         __add_wait_queue(&adev->fence_queue, &fence->fence_wake);
 482         fence_get(f);
 483         FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
 484         return true;
 485 }
 486
 487 /**
 488  * amdgpu_fence_signaled - check if a fence has signaled
 489  *
 490  * @fence: amdgpu fence object
 491  *
 492  * Check if the requested fence has signaled (all asics).
 493  * Returns true if the fence has signaled or false if it has not.
 494  */
 495 bool amdgpu_fence_signaled(struct amdgpu_fence *fence)
 496 {
 497         if (!fence)
 498                 return true;
 499
 500         if (amdgpu_fence_seq_signaled(fence->ring, fence->seq)) {
 501                 if (!fence_signal(&fence->base))
 502                         FENCE_TRACE(&fence->base, "signaled from amdgpu_fence_signaled\n");
 503                 return true;
 504         }
 505
 506         return false;
 507 }
 508
 509 /**
 510  * amdgpu_fence_any_seq_signaled - check if any sequence number is signaled
 511  *
 512  * @adev: amdgpu device pointer
 513  * @seq: sequence numbers
 514  *
 515  * Check if the last signaled fence sequnce number is >= the requested
 516  * sequence number (all asics).
 517  * Returns true if any has signaled (current value is >= requested value)
 518  * or false if it has not. Helper function for amdgpu_fence_wait_seq.
 519  */
 520 static bool amdgpu_fence_any_seq_signaled(struct amdgpu_device *adev, u64 *seq)
 521 {
 522         unsigned i;
 523
 524         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 525                 if (!adev->rings[i] || !seq[i])
 526                         continue;
 527
 528                 if (amdgpu_fence_seq_signaled(adev->rings[i], seq[i]))
 529                         return true;
 530         }
 531
 532         return false;
 533 }
 534
 535 /**
 536  * amdgpu_fence_wait_seq_timeout - wait for a specific sequence numbers
 537  *
 538  * @adev: amdgpu device pointer
 539  * @target_seq: sequence number(s) we want to wait for
 540  * @intr: use interruptable sleep
 541  * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
 542  *
 543  * Wait for the requested sequence number(s) to be written by any ring
 544  * (all asics).  Sequnce number array is indexed by ring id.
 545  * @intr selects whether to use interruptable (true) or non-interruptable
 546  * (false) sleep when waiting for the sequence number.  Helper function
 547  * for amdgpu_fence_wait_*().
 548  * Returns remaining time if the sequence number has passed, 0 when
 549  * the wait timeout, or an error for all other cases.
 550  * -EDEADLK is returned when a GPU lockup has been detected.
 551  */
 552 static long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev,
 553                                           u64 *target_seq, bool intr,
 554                                           long timeout)
 555 {
 556         uint64_t last_seq[AMDGPU_MAX_RINGS];
 557         bool signaled;
 558         int i;
 559         long r;
 560
 561         if (timeout == 0) {
 562                 return amdgpu_fence_any_seq_signaled(adev, target_seq);
 563         }
 564
 565         while (!amdgpu_fence_any_seq_signaled(adev, target_seq)) {
 566
 567                 /* Save current sequence values, used to check for GPU lockups */
 568                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 569                         struct amdgpu_ring *ring = adev->rings[i];
 570
 571                         if (!ring || !target_seq[i])
 572                                 continue;
 573
 574                         last_seq[i] = atomic64_read(&ring->fence_drv.last_seq);
 575                         trace_amdgpu_fence_wait_begin(adev->ddev, i, target_seq[i]);
 576                         amdgpu_irq_get(adev, ring->fence_drv.irq_src,
 577                                        ring->fence_drv.irq_type);
 578                 }
 579
 580                 if (intr) {
 581                         r = wait_event_interruptible_timeout(adev->fence_queue, (
 582                                 (signaled = amdgpu_fence_any_seq_signaled(adev, target_seq))
 583                                  || adev->needs_reset), AMDGPU_FENCE_JIFFIES_TIMEOUT);
 584                 } else {
 585                         r = wait_event_timeout(adev->fence_queue, (
 586                                 (signaled = amdgpu_fence_any_seq_signaled(adev, target_seq))
 587                                  || adev->needs_reset), AMDGPU_FENCE_JIFFIES_TIMEOUT);
 588                 }
 589
 590                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 591                         struct amdgpu_ring *ring = adev->rings[i];
 592
 593                         if (!ring || !target_seq[i])
 594                                 continue;
 595
 596                         amdgpu_irq_put(adev, ring->fence_drv.irq_src,
 597                                        ring->fence_drv.irq_type);
 598                         trace_amdgpu_fence_wait_end(adev->ddev, i, target_seq[i]);
 599                 }
 600
 601                 if (unlikely(r < 0))
 602                         return r;
 603
 604                 if (unlikely(!signaled)) {
 605
 606                         if (adev->needs_reset)
 607                                 return -EDEADLK;
 608
 609                         /* we were interrupted for some reason and fence
 610                          * isn't signaled yet, resume waiting */
 611                         if (r)
 612                                 continue;
 613
 614                         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 615                                 struct amdgpu_ring *ring = adev->rings[i];
 616
 617                                 if (!ring || !target_seq[i])
 618                                         continue;
 619
 620                                 if (last_seq[i] != atomic64_read(&ring->fence_drv.last_seq))
 621                                         break;
 622                         }
 623
 624                         if (i != AMDGPU_MAX_RINGS)
 625                                 continue;
 626
 627                         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 628                                 if (!adev->rings[i] || !target_seq[i])
 629                                         continue;
 630
 631                                 if (amdgpu_ring_is_lockup(adev->rings[i]))
 632                                         break;
 633                         }
 634
 635                         if (i < AMDGPU_MAX_RINGS) {
 636                                 /* good news we believe it's a lockup */
 637                                 dev_warn(adev->dev, "GPU lockup (waiting for "
 638                                          "0x%016llx last fence id 0x%016llx on"
 639                                          " ring %d)\n",
 640                                          target_seq[i], last_seq[i], i);
 641
 642                                 /* remember that we need an reset */
 643                                 adev->needs_reset = true;
 644                                 wake_up_all(&adev->fence_queue);
 645                                 return -EDEADLK;
 646                         }
 647
 648                         if (timeout < MAX_SCHEDULE_TIMEOUT) {
 649                                 timeout -= AMDGPU_FENCE_JIFFIES_TIMEOUT;
 650                                 if (timeout <= 0) {
 651                                         return 0;
 652                                 }
 653                         }
 654                 }
 655         }
 656         return timeout;
 657 }
 658
 659 /**
 660  * amdgpu_fence_wait - wait for a fence to signal
 661  *
 662  * @fence: amdgpu fence object
 663  * @intr: use interruptable sleep
 664  *
 665  * Wait for the requested fence to signal (all asics).
 666  * @intr selects whether to use interruptable (true) or non-interruptable
 667  * (false) sleep when waiting for the fence.
 668  * Returns 0 if the fence has passed, error for all other cases.
 669  */
 670 int amdgpu_fence_wait(struct amdgpu_fence *fence, bool intr)
 671 {
 672         uint64_t seq[AMDGPU_MAX_RINGS] = {};
 673         long r;
 674
 675         seq[fence->ring->idx] = fence->seq;
 676         r = amdgpu_fence_wait_seq_timeout(fence->ring->adev, seq, intr, MAX_SCHEDULE_TIMEOUT);
 677         if (r < 0) {
 678                 return r;
 679         }
 680
 681         r = fence_signal(&fence->base);
 682         if (!r)
 683                 FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
 684         return 0;
 685 }
 686
 687 /**
 688  * amdgpu_fence_wait_any - wait for a fence to signal on any ring
 689  *
 690  * @adev: amdgpu device pointer
 691  * @fences: amdgpu fence object(s)
 692  * @intr: use interruptable sleep
 693  *
 694  * Wait for any requested fence to signal (all asics).  Fence
 695  * array is indexed by ring id.  @intr selects whether to use
 696  * interruptable (true) or non-interruptable (false) sleep when
 697  * waiting for the fences. Used by the suballocator.
 698  * Returns 0 if any fence has passed, error for all other cases.
 699  */
 700 int amdgpu_fence_wait_any(struct amdgpu_device *adev,
 701                           struct amdgpu_fence **fences,
 702                           bool intr)
 703 {
 704         uint64_t seq[AMDGPU_MAX_RINGS];
 705         unsigned i, num_rings = 0;
 706         long r;
 707
 708         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 709                 seq[i] = 0;
 710
 711                 if (!fences[i]) {
 712                         continue;
 713                 }
 714
 715                 seq[i] = fences[i]->seq;
 716                 ++num_rings;
 717         }
 718
 719         /* nothing to wait for ? */
 720         if (num_rings == 0)
 721                 return -ENOENT;
 722
 723         r = amdgpu_fence_wait_seq_timeout(adev, seq, intr, MAX_SCHEDULE_TIMEOUT);
 724         if (r < 0) {
 725                 return r;
 726         }
 727         return 0;
 728 }
 729
 730 /**
 731  * amdgpu_fence_wait_next - wait for the next fence to signal
 732  *
 733  * @adev: amdgpu device pointer
 734  * @ring: ring index the fence is associated with
 735  *
 736  * Wait for the next fence on the requested ring to signal (all asics).
 737  * Returns 0 if the next fence has passed, error for all other cases.
 738  * Caller must hold ring lock.
 739  */
 740 int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
 741 {
 742         uint64_t seq[AMDGPU_MAX_RINGS] = {};
 743         long r;
 744
 745         seq[ring->idx] = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;
 746         if (seq[ring->idx] >= ring->fence_drv.sync_seq[ring->idx]) {
 747                 /* nothing to wait for, last_seq is
 748                    already the last emited fence */
 749                 return -ENOENT;
 750         }
 751         r = amdgpu_fence_wait_seq_timeout(ring->adev, seq, false, MAX_SCHEDULE_TIMEOUT);
 752         if (r < 0)
 753                 return r;
 754         return 0;
 755 }
 756
 757 /**
 758  * amdgpu_fence_wait_empty - wait for all fences to signal
 759  *
 760  * @adev: amdgpu device pointer
 761  * @ring: ring index the fence is associated with
 762  *
 763  * Wait for all fences on the requested ring to signal (all asics).
 764  * Returns 0 if the fences have passed, error for all other cases.
 765  * Caller must hold ring lock.
 766  */
 767 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
 768 {
 769         struct amdgpu_device *adev = ring->adev;
 770         uint64_t seq[AMDGPU_MAX_RINGS] = {};
 771         long r;
 772
 773         seq[ring->idx] = ring->fence_drv.sync_seq[ring->idx];
 774         if (!seq[ring->idx])
 775                 return 0;
 776
 777         r = amdgpu_fence_wait_seq_timeout(adev, seq, false, MAX_SCHEDULE_TIMEOUT);
 778         if (r < 0) {
 779                 if (r == -EDEADLK)
 780                         return -EDEADLK;
 781
 782                 dev_err(adev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
 783                         ring->idx, r);
 784         }
 785         return 0;
 786 }
 787
 788 /**
 789  * amdgpu_fence_ref - take a ref on a fence
 790  *
 791  * @fence: amdgpu fence object
 792  *
 793  * Take a reference on a fence (all asics).
 794  * Returns the fence.
 795  */
 796 struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence)
 797 {
 798         fence_get(&fence->base);
 799         return fence;
 800 }
 801
 802 /**
 803  * amdgpu_fence_unref - remove a ref on a fence
 804  *
 805  * @fence: amdgpu fence object
 806  *
 807  * Remove a reference on a fence (all asics).
 808  */
 809 void amdgpu_fence_unref(struct amdgpu_fence **fence)
 810 {
 811         struct amdgpu_fence *tmp = *fence;
 812
 813         *fence = NULL;
 814         if (tmp)
 815                 fence_put(&tmp->base);
 816 }
 817
 818 /**
 819  * amdgpu_fence_count_emitted - get the count of emitted fences
 820  *
 821  * @ring: ring the fence is associated with
 822  *
 823  * Get the number of fences emitted on the requested ring (all asics).
 824  * Returns the number of emitted fences on the ring.  Used by the
 825  * dynpm code to ring track activity.
 826  */
 827 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
 828 {
 829         uint64_t emitted;
 830
 831         /* We are not protected by ring lock when reading the last sequence
 832          * but it's ok to report slightly wrong fence count here.
 833          */
 834         amdgpu_fence_process(ring);
 835         emitted = ring->fence_drv.sync_seq[ring->idx]
 836                 - atomic64_read(&ring->fence_drv.last_seq);
 837         /* to avoid 32bits warp around */
 838         if (emitted > 0x10000000)
 839                 emitted = 0x10000000;
 840
 841         return (unsigned)emitted;
 842 }
 843
 844 /**
 845  * amdgpu_fence_need_sync - do we need a semaphore
 846  *
 847  * @fence: amdgpu fence object
 848  * @dst_ring: which ring to check against
 849  *
 850  * Check if the fence needs to be synced against another ring
 851  * (all asics).  If so, we need to emit a semaphore.
 852  * Returns true if we need to sync with another ring, false if
 853  * not.
 854  */
 855 bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
 856                             struct amdgpu_ring *dst_ring)
 857 {
 858         struct amdgpu_fence_driver *fdrv;
 859
 860         if (!fence)
 861                 return false;
 862
 863         if (fence->ring == dst_ring)
 864                 return false;
 865
 866         /* we are protected by the ring mutex */
 867         fdrv = &dst_ring->fence_drv;
 868         if (fence->seq <= fdrv->sync_seq[fence->ring->idx])
 869                 return false;
 870
 871         return true;
 872 }
 873
 874 /**
 875  * amdgpu_fence_note_sync - record the sync point
 876  *
 877  * @fence: amdgpu fence object
 878  * @dst_ring: which ring to check against
 879  *
 880  * Note the sequence number at which point the fence will
 881  * be synced with the requested ring (all asics).
 882  */
 883 void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
 884                             struct amdgpu_ring *dst_ring)
 885 {
 886         struct amdgpu_fence_driver *dst, *src;
 887         unsigned i;
 888
 889         if (!fence)
 890                 return;
 891
 892         if (fence->ring == dst_ring)
 893                 return;
 894
 895         /* we are protected by the ring mutex */
 896         src = &fence->ring->fence_drv;
 897         dst = &dst_ring->fence_drv;
 898         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 899                 if (i == dst_ring->idx)
 900                         continue;
 901
 902                 dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
 903         }
 904 }
 905
 906 /**
 907  * amdgpu_fence_driver_start_ring - make the fence driver
 908  * ready for use on the requested ring.
 909  *
 910  * @ring: ring to start the fence driver on
 911  * @irq_src: interrupt source to use for this ring
 912  * @irq_type: interrupt type to use for this ring
 913  *
 914  * Make the fence driver ready for processing (all asics).
 915  * Not all asics have all rings, so each asic will only
 916  * start the fence driver on the rings it has.
 917  * Returns 0 for success, errors for failure.
 918  */
 919 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 920                                    struct amdgpu_irq_src *irq_src,
 921                                    unsigned irq_type)
 922 {
 923         struct amdgpu_device *adev = ring->adev;
 924         uint64_t index;
 925
 926         if (ring != &adev->uvd.ring) {
 927                 ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
 928                 ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
 929         } else {
 930                 /* put fence directly behind firmware */
 931                 index = ALIGN(adev->uvd.fw->size, 8);
 932                 ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
 933                 ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
 934         }
 935         amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq));
 936         ring->fence_drv.initialized = true;
 937         ring->fence_drv.irq_src = irq_src;
 938         ring->fence_drv.irq_type = irq_type;
 939         dev_info(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, "
 940                  "cpu addr 0x%p\n", ring->idx,
 941                  ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr);
 942         return 0;
 943 }
 944
 945 /**
 946  * amdgpu_fence_driver_init_ring - init the fence driver
 947  * for the requested ring.
 948  *
 949  * @ring: ring to init the fence driver on
 950  *
 951  * Init the fence driver for the requested ring (all asics).
 952  * Helper function for amdgpu_fence_driver_init().
 953  */
 954 void amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
 955 {
 956         int i;
 957
 958         ring->fence_drv.cpu_addr = NULL;
 959         ring->fence_drv.gpu_addr = 0;
 960         for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 961                 ring->fence_drv.sync_seq[i] = 0;
 962
 963         atomic64_set(&ring->fence_drv.last_seq, 0);
 964         ring->fence_drv.initialized = false;
 965
 966         INIT_DELAYED_WORK(&ring->fence_drv.lockup_work,
 967                         amdgpu_fence_check_lockup);
 968         ring->fence_drv.ring = ring;
 969 }
 970
 971 /**
 972  * amdgpu_fence_driver_init - init the fence driver
 973  * for all possible rings.
 974  *
 975  * @adev: amdgpu device pointer
 976  *
 977  * Init the fence driver for all possible rings (all asics).
 978  * Not all asics have all rings, so each asic will only
 979  * start the fence driver on the rings it has using
 980  * amdgpu_fence_driver_start_ring().
 981  * Returns 0 for success.
 982  */
 983 int amdgpu_fence_driver_init(struct amdgpu_device *adev)
 984 {
 985         init_waitqueue_head(&adev->fence_queue);
 986         if (amdgpu_debugfs_fence_init(adev))
 987                 dev_err(adev->dev, "fence debugfs file creation failed\n");
 988
 989         return 0;
 990 }
 991
 992 /**
 993  * amdgpu_fence_driver_fini - tear down the fence driver
 994  * for all possible rings.
 995  *
 996  * @adev: amdgpu device pointer
 997  *
 998  * Tear down the fence driver for all possible rings (all asics).
 999  */
1000 void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
1001 {
1002         int i, r;
1003
1004         mutex_lock(&adev->ring_lock);
1005         for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
1006                 struct amdgpu_ring *ring = adev->rings[i];
1007                 if (!ring || !ring->fence_drv.initialized)
1008                         continue;
1009                 r = amdgpu_fence_wait_empty(ring);
1010                 if (r) {
1011                         /* no need to trigger GPU reset as we are unloading */
1012                         amdgpu_fence_driver_force_completion(adev);
1013                 }
1014                 wake_up_all(&adev->fence_queue);
1015                 ring->fence_drv.initialized = false;
1016         }
1017         mutex_unlock(&adev->ring_lock);
1018 }
1019
1020 /**
1021  * amdgpu_fence_driver_force_completion - force all fence waiter to complete
1022  *
1023  * @adev: amdgpu device pointer
1024  *
1025  * In case of GPU reset failure make sure no process keep waiting on fence
1026  * that will never complete.
1027  */
1028 void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
1029 {
1030         int i;
1031
1032         for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
1033                 struct amdgpu_ring *ring = adev->rings[i];
1034                 if (!ring || !ring->fence_drv.initialized)
1035                         continue;
1036
1037                 amdgpu_fence_write(ring, ring->fence_drv.sync_seq[i]);
1038         }
1039 }
1040
1041
1042 /*
1043  * Fence debugfs
1044  */
1045 #if defined(CONFIG_DEBUG_FS)
1046 static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
1047 {
1048         struct drm_info_node *node = (struct drm_info_node *)m->private;
1049         struct drm_device *dev = node->minor->dev;
1050         struct amdgpu_device *adev = dev->dev_private;
1051         int i, j;
1052
1053         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
1054                 struct amdgpu_ring *ring = adev->rings[i];
1055                 if (!ring || !ring->fence_drv.initialized)
1056                         continue;
1057
1058                 amdgpu_fence_process(ring);
1059
1060                 seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
1061                 seq_printf(m, "Last signaled fence 0x%016llx\n",
1062                            (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
1063                 seq_printf(m, "Last emitted        0x%016llx\n",
1064                            ring->fence_drv.sync_seq[i]);
1065
1066                 for (j = 0; j < AMDGPU_MAX_RINGS; ++j) {
1067                         struct amdgpu_ring *other = adev->rings[j];
1068                         if (i != j && other && other->fence_drv.initialized &&
1069                             ring->fence_drv.sync_seq[j])
1070                                 seq_printf(m, "Last sync to ring %d 0x%016llx\n",
1071                                            j, ring->fence_drv.sync_seq[j]);
1072                 }
1073         }
1074         return 0;
1075 }
1076
1077 static struct drm_info_list amdgpu_debugfs_fence_list[] = {
1078         {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
1079 };
1080 #endif
1081
1082 int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
1083 {
1084 #if defined(CONFIG_DEBUG_FS)
1085         return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1);
1086 #else
1087         return 0;
1088 #endif
1089 }
1090
1091 static const char *amdgpu_fence_get_driver_name(struct fence *fence)
1092 {
1093         return "amdgpu";
1094 }
1095
1096 static const char *amdgpu_fence_get_timeline_name(struct fence *f)
1097 {
1098         struct amdgpu_fence *fence = to_amdgpu_fence(f);
1099         return (const char *)fence->ring->name;
1100 }
1101
1102 static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence)
1103 {
1104         return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
1105 }
1106
1107 struct amdgpu_wait_cb {
1108         struct fence_cb base;
1109         struct task_struct *task;
1110 };
1111
1112 static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
1113 {
1114         struct amdgpu_wait_cb *wait =
1115                 container_of(cb, struct amdgpu_wait_cb, base);
1116         wake_up_process(wait->task);
1117 }
1118
1119 static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
1120                                              signed long t)
1121 {
1122         struct amdgpu_fence *fence = to_amdgpu_fence(f);
1123         struct amdgpu_device *adev = fence->ring->adev;
1124         struct amdgpu_wait_cb cb;
1125
1126         cb.task = current;
1127
1128         if (fence_add_callback(f, &cb.base, amdgpu_fence_wait_cb))
1129                 return t;
1130
1131         while (t > 0) {
1132                 if (intr)
1133                         set_current_state(TASK_INTERRUPTIBLE);
1134                 else
1135                         set_current_state(TASK_UNINTERRUPTIBLE);
1136
1137                 /*
1138                  * amdgpu_test_signaled must be called after
1139                  * set_current_state to prevent a race with wake_up_process
1140                  */
1141                 if (amdgpu_test_signaled(fence))
1142                         break;
1143
1144                 if (adev->needs_reset) {
1145                         t = -EDEADLK;
1146                         break;
1147                 }
1148
1149                 t = schedule_timeout(t);
1150
1151                 if (t > 0 && intr && signal_pending(current))
1152                         t = -ERESTARTSYS;
1153         }
1154
1155         __set_current_state(TASK_RUNNING);
1156         fence_remove_callback(f, &cb.base);
1157
1158         return t;
1159 }
1160
1161 const struct fence_ops amdgpu_fence_ops = {
1162         .get_driver_name = amdgpu_fence_get_driver_name,
1163         .get_timeline_name = amdgpu_fence_get_timeline_name,
1164         .enable_signaling = amdgpu_fence_enable_signaling,
1165         .signaled = amdgpu_fence_is_signaled,
1166         .wait = amdgpu_fence_default_wait,
1167         .release = NULL,
1168 };