Add USB_EHCI_EXYNOS to multi_v7_defconfig
[cascardo/linux.git] / drivers / gpu / drm / radeon / radeon_kfd.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22
23 #include <linux/module.h>
24 #include <linux/fdtable.h>
25 #include <linux/uaccess.h>
26 #include <drm/drmP.h>
27 #include "radeon.h"
28 #include "cikd.h"
29 #include "cik_reg.h"
30 #include "radeon_kfd.h"
31
32 #define CIK_PIPE_PER_MEC        (4)
33
34 struct kgd_mem {
35         struct radeon_sa_bo *sa_bo;
36         uint64_t gpu_addr;
37         void *ptr;
38 };
39
40 static int init_sa_manager(struct kgd_dev *kgd, unsigned int size);
41 static void fini_sa_manager(struct kgd_dev *kgd);
42
43 static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment,
44                 enum kgd_memory_pool pool, struct kgd_mem **mem);
45
46 static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem);
47
48 static uint64_t get_vmem_size(struct kgd_dev *kgd);
49 static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
50
51 static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
52
53 /*
54  * Register access functions
55  */
56
57 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
58                 uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
59                 uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
60
61 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
62                                         unsigned int vmid);
63
64 static int kgd_init_memory(struct kgd_dev *kgd);
65
66 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
67                                 uint32_t hpd_size, uint64_t hpd_gpu_addr);
68
69 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
70                         uint32_t queue_id, uint32_t __user *wptr);
71
72 static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address,
73                                 uint32_t pipe_id, uint32_t queue_id);
74
75 static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
76                                 unsigned int timeout, uint32_t pipe_id,
77                                 uint32_t queue_id);
78
79 static const struct kfd2kgd_calls kfd2kgd = {
80         .init_sa_manager = init_sa_manager,
81         .fini_sa_manager = fini_sa_manager,
82         .allocate_mem = allocate_mem,
83         .free_mem = free_mem,
84         .get_vmem_size = get_vmem_size,
85         .get_gpu_clock_counter = get_gpu_clock_counter,
86         .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
87         .program_sh_mem_settings = kgd_program_sh_mem_settings,
88         .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
89         .init_memory = kgd_init_memory,
90         .init_pipeline = kgd_init_pipeline,
91         .hqd_load = kgd_hqd_load,
92         .hqd_is_occupies = kgd_hqd_is_occupies,
93         .hqd_destroy = kgd_hqd_destroy,
94 };
95
96 static const struct kgd2kfd_calls *kgd2kfd;
97
98 bool radeon_kfd_init(void)
99 {
100         bool (*kgd2kfd_init_p)(unsigned, const struct kfd2kgd_calls*,
101                                 const struct kgd2kfd_calls**);
102
103         kgd2kfd_init_p = symbol_request(kgd2kfd_init);
104
105         if (kgd2kfd_init_p == NULL)
106                 return false;
107
108         if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kfd2kgd, &kgd2kfd)) {
109                 symbol_put(kgd2kfd_init);
110                 kgd2kfd = NULL;
111
112                 return false;
113         }
114
115         return true;
116 }
117
118 void radeon_kfd_fini(void)
119 {
120         if (kgd2kfd) {
121                 kgd2kfd->exit();
122                 symbol_put(kgd2kfd_init);
123         }
124 }
125
126 void radeon_kfd_device_probe(struct radeon_device *rdev)
127 {
128         if (kgd2kfd)
129                 rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, rdev->pdev);
130 }
131
132 void radeon_kfd_device_init(struct radeon_device *rdev)
133 {
134         if (rdev->kfd) {
135                 struct kgd2kfd_shared_resources gpu_resources = {
136                         .compute_vmid_bitmap = 0xFF00,
137
138                         .first_compute_pipe = 1,
139                         .compute_pipe_count = 8 - 1,
140                 };
141
142                 radeon_doorbell_get_kfd_info(rdev,
143                                 &gpu_resources.doorbell_physical_address,
144                                 &gpu_resources.doorbell_aperture_size,
145                                 &gpu_resources.doorbell_start_offset);
146
147                 kgd2kfd->device_init(rdev->kfd, &gpu_resources);
148         }
149 }
150
151 void radeon_kfd_device_fini(struct radeon_device *rdev)
152 {
153         if (rdev->kfd) {
154                 kgd2kfd->device_exit(rdev->kfd);
155                 rdev->kfd = NULL;
156         }
157 }
158
159 void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry)
160 {
161         if (rdev->kfd)
162                 kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
163 }
164
165 void radeon_kfd_suspend(struct radeon_device *rdev)
166 {
167         if (rdev->kfd)
168                 kgd2kfd->suspend(rdev->kfd);
169 }
170
171 int radeon_kfd_resume(struct radeon_device *rdev)
172 {
173         int r = 0;
174
175         if (rdev->kfd)
176                 r = kgd2kfd->resume(rdev->kfd);
177
178         return r;
179 }
180
181 static u32 pool_to_domain(enum kgd_memory_pool p)
182 {
183         switch (p) {
184         case KGD_POOL_FRAMEBUFFER: return RADEON_GEM_DOMAIN_VRAM;
185         default: return RADEON_GEM_DOMAIN_GTT;
186         }
187 }
188
189 static int init_sa_manager(struct kgd_dev *kgd, unsigned int size)
190 {
191         struct radeon_device *rdev = (struct radeon_device *)kgd;
192         int r;
193
194         BUG_ON(kgd == NULL);
195
196         r = radeon_sa_bo_manager_init(rdev, &rdev->kfd_bo,
197                                       size,
198                                       RADEON_GPU_PAGE_SIZE,
199                                       RADEON_GEM_DOMAIN_GTT,
200                                       RADEON_GEM_GTT_WC);
201
202         if (r)
203                 return r;
204
205         r = radeon_sa_bo_manager_start(rdev, &rdev->kfd_bo);
206         if (r)
207                 radeon_sa_bo_manager_fini(rdev, &rdev->kfd_bo);
208
209         return r;
210 }
211
212 static void fini_sa_manager(struct kgd_dev *kgd)
213 {
214         struct radeon_device *rdev = (struct radeon_device *)kgd;
215
216         BUG_ON(kgd == NULL);
217
218         radeon_sa_bo_manager_suspend(rdev, &rdev->kfd_bo);
219         radeon_sa_bo_manager_fini(rdev, &rdev->kfd_bo);
220 }
221
222 static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment,
223                 enum kgd_memory_pool pool, struct kgd_mem **mem)
224 {
225         struct radeon_device *rdev = (struct radeon_device *)kgd;
226         u32 domain;
227         int r;
228
229         BUG_ON(kgd == NULL);
230
231         domain = pool_to_domain(pool);
232         if (domain != RADEON_GEM_DOMAIN_GTT) {
233                 dev_err(rdev->dev,
234                         "Only allowed to allocate gart memory for kfd\n");
235                 return -EINVAL;
236         }
237
238         *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
239         if ((*mem) == NULL)
240                 return -ENOMEM;
241
242         r = radeon_sa_bo_new(rdev, &rdev->kfd_bo, &(*mem)->sa_bo, size,
243                                 alignment);
244         if (r) {
245                 dev_err(rdev->dev, "failed to get memory for kfd (%d)\n", r);
246                 return r;
247         }
248
249         (*mem)->ptr = radeon_sa_bo_cpu_addr((*mem)->sa_bo);
250         (*mem)->gpu_addr = radeon_sa_bo_gpu_addr((*mem)->sa_bo);
251
252         return 0;
253 }
254
255 static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem)
256 {
257         struct radeon_device *rdev = (struct radeon_device *)kgd;
258
259         BUG_ON(kgd == NULL);
260
261         radeon_sa_bo_free(rdev, &mem->sa_bo, NULL);
262         kfree(mem);
263 }
264
265 static uint64_t get_vmem_size(struct kgd_dev *kgd)
266 {
267         struct radeon_device *rdev = (struct radeon_device *)kgd;
268
269         BUG_ON(kgd == NULL);
270
271         return rdev->mc.real_vram_size;
272 }
273
274 static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
275 {
276         struct radeon_device *rdev = (struct radeon_device *)kgd;
277
278         return rdev->asic->get_gpu_clock_counter(rdev);
279 }
280
281 static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
282 {
283         struct radeon_device *rdev = (struct radeon_device *)kgd;
284
285         /* The sclk is in quantas of 10kHz */
286         return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
287 }
288
289 static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd)
290 {
291         return (struct radeon_device *)kgd;
292 }
293
294 static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value)
295 {
296         struct radeon_device *rdev = get_radeon_device(kgd);
297
298         writel(value, (void __iomem *)(rdev->rmmio + offset));
299 }
300
301 static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset)
302 {
303         struct radeon_device *rdev = get_radeon_device(kgd);
304
305         return readl((void __iomem *)(rdev->rmmio + offset));
306 }
307
308 static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
309                         uint32_t queue, uint32_t vmid)
310 {
311         struct radeon_device *rdev = get_radeon_device(kgd);
312         uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
313
314         mutex_lock(&rdev->srbm_mutex);
315         write_register(kgd, SRBM_GFX_CNTL, value);
316 }
317
318 static void unlock_srbm(struct kgd_dev *kgd)
319 {
320         struct radeon_device *rdev = get_radeon_device(kgd);
321
322         write_register(kgd, SRBM_GFX_CNTL, 0);
323         mutex_unlock(&rdev->srbm_mutex);
324 }
325
326 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
327                                 uint32_t queue_id)
328 {
329         uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
330         uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
331
332         lock_srbm(kgd, mec, pipe, queue_id, 0);
333 }
334
335 static void release_queue(struct kgd_dev *kgd)
336 {
337         unlock_srbm(kgd);
338 }
339
340 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
341                                         uint32_t sh_mem_config,
342                                         uint32_t sh_mem_ape1_base,
343                                         uint32_t sh_mem_ape1_limit,
344                                         uint32_t sh_mem_bases)
345 {
346         lock_srbm(kgd, 0, 0, 0, vmid);
347
348         write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
349         write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base);
350         write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit);
351         write_register(kgd, SH_MEM_BASES, sh_mem_bases);
352
353         unlock_srbm(kgd);
354 }
355
356 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
357                                         unsigned int vmid)
358 {
359         /*
360          * We have to assume that there is no outstanding mapping.
361          * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0
362          * because a mapping is in progress or because a mapping finished and
363          * the SW cleared it.
364          * So the protocol is to always wait & clear.
365          */
366         uint32_t pasid_mapping = (pasid == 0) ? 0 :
367                                 (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID;
368
369         write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
370                         pasid_mapping);
371
372         while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) &
373                                                                 (1U << vmid)))
374                 cpu_relax();
375         write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
376
377         return 0;
378 }
379
380 static int kgd_init_memory(struct kgd_dev *kgd)
381 {
382         /*
383          * Configure apertures:
384          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
385          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
386          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
387          */
388         int i;
389         uint32_t sh_mem_bases = PRIVATE_BASE(0x6000) | SHARED_BASE(0x6000);
390
391         for (i = 8; i < 16; i++) {
392                 uint32_t sh_mem_config;
393
394                 lock_srbm(kgd, 0, 0, 0, i);
395
396                 sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
397                 sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
398
399                 write_register(kgd, SH_MEM_CONFIG, sh_mem_config);
400
401                 write_register(kgd, SH_MEM_BASES, sh_mem_bases);
402
403                 /* Scratch aperture is not supported for now. */
404                 write_register(kgd, SH_STATIC_MEM_CONFIG, 0);
405
406                 /* APE1 disabled for now. */
407                 write_register(kgd, SH_MEM_APE1_BASE, 1);
408                 write_register(kgd, SH_MEM_APE1_LIMIT, 0);
409
410                 unlock_srbm(kgd);
411         }
412
413         return 0;
414 }
415
416 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
417                                 uint32_t hpd_size, uint64_t hpd_gpu_addr)
418 {
419         uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
420         uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
421
422         lock_srbm(kgd, mec, pipe, 0, 0);
423         write_register(kgd, CP_HPD_EOP_BASE_ADDR,
424                         lower_32_bits(hpd_gpu_addr >> 8));
425         write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI,
426                         upper_32_bits(hpd_gpu_addr >> 8));
427         write_register(kgd, CP_HPD_EOP_VMID, 0);
428         write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size);
429         unlock_srbm(kgd);
430
431         return 0;
432 }
433
434 static inline struct cik_mqd *get_mqd(void *mqd)
435 {
436         return (struct cik_mqd *)mqd;
437 }
438
439 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
440                         uint32_t queue_id, uint32_t __user *wptr)
441 {
442         uint32_t wptr_shadow, is_wptr_shadow_valid;
443         struct cik_mqd *m;
444
445         m = get_mqd(mqd);
446
447         is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
448
449         acquire_queue(kgd, pipe_id, queue_id);
450         write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
451         write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
452         write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control);
453
454         write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
455         write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
456         write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
457
458         write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
459         write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
460         write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
461
462         write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
463
464         write_register(kgd, CP_HQD_PERSISTENT_STATE,
465                         m->cp_hqd_persistent_state);
466         write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
467         write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
468
469         write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO,
470                         m->cp_hqd_atomic0_preop_lo);
471
472         write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI,
473                         m->cp_hqd_atomic0_preop_hi);
474
475         write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO,
476                         m->cp_hqd_atomic1_preop_lo);
477
478         write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI,
479                         m->cp_hqd_atomic1_preop_hi);
480
481         write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR,
482                         m->cp_hqd_pq_rptr_report_addr_lo);
483
484         write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
485                         m->cp_hqd_pq_rptr_report_addr_hi);
486
487         write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
488
489         write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR,
490                         m->cp_hqd_pq_wptr_poll_addr_lo);
491
492         write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI,
493                         m->cp_hqd_pq_wptr_poll_addr_hi);
494
495         write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL,
496                         m->cp_hqd_pq_doorbell_control);
497
498         write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid);
499
500         write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum);
501
502         write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
503         write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
504
505         write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
506
507         if (is_wptr_shadow_valid)
508                 write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow);
509
510         write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active);
511         release_queue(kgd);
512
513         return 0;
514 }
515
516 static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address,
517                                 uint32_t pipe_id, uint32_t queue_id)
518 {
519         uint32_t act;
520         bool retval = false;
521         uint32_t low, high;
522
523         acquire_queue(kgd, pipe_id, queue_id);
524         act = read_register(kgd, CP_HQD_ACTIVE);
525         if (act) {
526                 low = lower_32_bits(queue_address >> 8);
527                 high = upper_32_bits(queue_address >> 8);
528
529                 if (low == read_register(kgd, CP_HQD_PQ_BASE) &&
530                                 high == read_register(kgd, CP_HQD_PQ_BASE_HI))
531                         retval = true;
532         }
533         release_queue(kgd);
534         return retval;
535 }
536
537 static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
538                                 unsigned int timeout, uint32_t pipe_id,
539                                 uint32_t queue_id)
540 {
541         uint32_t temp;
542
543         acquire_queue(kgd, pipe_id, queue_id);
544         write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0);
545
546         write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type);
547
548         while (true) {
549                 temp = read_register(kgd, CP_HQD_ACTIVE);
550                 if (temp & 0x1)
551                         break;
552                 if (timeout == 0) {
553                         pr_err("kfd: cp queue preemption time out (%dms)\n",
554                                 temp);
555                         return -ETIME;
556                 }
557                 msleep(20);
558                 timeout -= 20;
559         }
560
561         release_queue(kgd);
562         return 0;
563 }