sparc64: Enable PCI IOMMU version 2 API
[cascardo/linux.git] / arch / sparc / kernel / pci_sun4v.c
1 /* pci_sun4v.c: SUN4V specific PCI controller support.
2  *
3  * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net)
4  */
5
6 #include <linux/kernel.h>
7 #include <linux/types.h>
8 #include <linux/pci.h>
9 #include <linux/init.h>
10 #include <linux/slab.h>
11 #include <linux/interrupt.h>
12 #include <linux/percpu.h>
13 #include <linux/irq.h>
14 #include <linux/msi.h>
15 #include <linux/export.h>
16 #include <linux/log2.h>
17 #include <linux/of_device.h>
18 #include <linux/iommu-common.h>
19
20 #include <asm/iommu.h>
21 #include <asm/irq.h>
22 #include <asm/hypervisor.h>
23 #include <asm/prom.h>
24
25 #include "pci_impl.h"
26 #include "iommu_common.h"
27
28 #include "pci_sun4v.h"
29
30 #define DRIVER_NAME     "pci_sun4v"
31 #define PFX             DRIVER_NAME ": "
32
33 static unsigned long vpci_major;
34 static unsigned long vpci_minor;
35
36 struct vpci_version {
37         unsigned long major;
38         unsigned long minor;
39 };
40
41 /* Ordered from largest major to lowest */
42 static struct vpci_version vpci_versions[] = {
43         { .major = 2, .minor = 0 },
44         { .major = 1, .minor = 1 },
45 };
46
47 #define PGLIST_NENTS    (PAGE_SIZE / sizeof(u64))
48
49 struct iommu_batch {
50         struct device   *dev;           /* Device mapping is for.       */
51         unsigned long   prot;           /* IOMMU page protections       */
52         unsigned long   entry;          /* Index into IOTSB.            */
53         u64             *pglist;        /* List of physical pages       */
54         unsigned long   npages;         /* Number of pages in list.     */
55 };
56
57 static DEFINE_PER_CPU(struct iommu_batch, iommu_batch);
58 static int iommu_batch_initialized;
59
60 /* Interrupts must be disabled.  */
61 static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry)
62 {
63         struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
64
65         p->dev          = dev;
66         p->prot         = prot;
67         p->entry        = entry;
68         p->npages       = 0;
69 }
70
71 /* Interrupts must be disabled.  */
72 static long iommu_batch_flush(struct iommu_batch *p)
73 {
74         struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
75         unsigned long devhandle = pbm->devhandle;
76         unsigned long prot = p->prot;
77         unsigned long entry = p->entry;
78         u64 *pglist = p->pglist;
79         unsigned long npages = p->npages;
80
81         while (npages != 0) {
82                 long num;
83
84                 num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
85                                           npages, prot, __pa(pglist));
86                 if (unlikely(num < 0)) {
87                         if (printk_ratelimit())
88                                 printk("iommu_batch_flush: IOMMU map of "
89                                        "[%08lx:%08llx:%lx:%lx:%lx] failed with "
90                                        "status %ld\n",
91                                        devhandle, HV_PCI_TSBID(0, entry),
92                                        npages, prot, __pa(pglist), num);
93                         return -1;
94                 }
95
96                 entry += num;
97                 npages -= num;
98                 pglist += num;
99         }
100
101         p->entry = entry;
102         p->npages = 0;
103
104         return 0;
105 }
106
107 static inline void iommu_batch_new_entry(unsigned long entry)
108 {
109         struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
110
111         if (p->entry + p->npages == entry)
112                 return;
113         if (p->entry != ~0UL)
114                 iommu_batch_flush(p);
115         p->entry = entry;
116 }
117
118 /* Interrupts must be disabled.  */
119 static inline long iommu_batch_add(u64 phys_page)
120 {
121         struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
122
123         BUG_ON(p->npages >= PGLIST_NENTS);
124
125         p->pglist[p->npages++] = phys_page;
126         if (p->npages == PGLIST_NENTS)
127                 return iommu_batch_flush(p);
128
129         return 0;
130 }
131
132 /* Interrupts must be disabled.  */
133 static inline long iommu_batch_end(void)
134 {
135         struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
136
137         BUG_ON(p->npages >= PGLIST_NENTS);
138
139         return iommu_batch_flush(p);
140 }
141
142 static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
143                                    dma_addr_t *dma_addrp, gfp_t gfp,
144                                    unsigned long attrs)
145 {
146         unsigned long flags, order, first_page, npages, n;
147         struct iommu *iommu;
148         struct page *page;
149         void *ret;
150         long entry;
151         int nid;
152
153         size = IO_PAGE_ALIGN(size);
154         order = get_order(size);
155         if (unlikely(order >= MAX_ORDER))
156                 return NULL;
157
158         npages = size >> IO_PAGE_SHIFT;
159
160         nid = dev->archdata.numa_node;
161         page = alloc_pages_node(nid, gfp, order);
162         if (unlikely(!page))
163                 return NULL;
164
165         first_page = (unsigned long) page_address(page);
166         memset((char *)first_page, 0, PAGE_SIZE << order);
167
168         iommu = dev->archdata.iommu;
169
170         entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
171                                       (unsigned long)(-1), 0);
172
173         if (unlikely(entry == IOMMU_ERROR_CODE))
174                 goto range_alloc_fail;
175
176         *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
177         ret = (void *) first_page;
178         first_page = __pa(first_page);
179
180         local_irq_save(flags);
181
182         iommu_batch_start(dev,
183                           (HV_PCI_MAP_ATTR_READ |
184                            HV_PCI_MAP_ATTR_WRITE),
185                           entry);
186
187         for (n = 0; n < npages; n++) {
188                 long err = iommu_batch_add(first_page + (n * PAGE_SIZE));
189                 if (unlikely(err < 0L))
190                         goto iommu_map_fail;
191         }
192
193         if (unlikely(iommu_batch_end() < 0L))
194                 goto iommu_map_fail;
195
196         local_irq_restore(flags);
197
198         return ret;
199
200 iommu_map_fail:
201         iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
202
203 range_alloc_fail:
204         free_pages(first_page, order);
205         return NULL;
206 }
207
208 static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry,
209                                unsigned long npages)
210 {
211         u32 devhandle = *(u32 *)demap_arg;
212         unsigned long num, flags;
213
214         local_irq_save(flags);
215         do {
216                 num = pci_sun4v_iommu_demap(devhandle,
217                                             HV_PCI_TSBID(0, entry),
218                                             npages);
219
220                 entry += num;
221                 npages -= num;
222         } while (npages != 0);
223         local_irq_restore(flags);
224 }
225
226 static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
227                                  dma_addr_t dvma, unsigned long attrs)
228 {
229         struct pci_pbm_info *pbm;
230         struct iommu *iommu;
231         unsigned long order, npages, entry;
232         u32 devhandle;
233
234         npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
235         iommu = dev->archdata.iommu;
236         pbm = dev->archdata.host_controller;
237         devhandle = pbm->devhandle;
238         entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
239         dma_4v_iommu_demap(&devhandle, entry, npages);
240         iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
241         order = get_order(size);
242         if (order < 10)
243                 free_pages((unsigned long)cpu, order);
244 }
245
246 static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
247                                   unsigned long offset, size_t sz,
248                                   enum dma_data_direction direction,
249                                   unsigned long attrs)
250 {
251         struct iommu *iommu;
252         unsigned long flags, npages, oaddr;
253         unsigned long i, base_paddr;
254         u32 bus_addr, ret;
255         unsigned long prot;
256         long entry;
257
258         iommu = dev->archdata.iommu;
259
260         if (unlikely(direction == DMA_NONE))
261                 goto bad;
262
263         oaddr = (unsigned long)(page_address(page) + offset);
264         npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
265         npages >>= IO_PAGE_SHIFT;
266
267         entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
268                                       (unsigned long)(-1), 0);
269
270         if (unlikely(entry == IOMMU_ERROR_CODE))
271                 goto bad;
272
273         bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
274         ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
275         base_paddr = __pa(oaddr & IO_PAGE_MASK);
276         prot = HV_PCI_MAP_ATTR_READ;
277         if (direction != DMA_TO_DEVICE)
278                 prot |= HV_PCI_MAP_ATTR_WRITE;
279
280         local_irq_save(flags);
281
282         iommu_batch_start(dev, prot, entry);
283
284         for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
285                 long err = iommu_batch_add(base_paddr);
286                 if (unlikely(err < 0L))
287                         goto iommu_map_fail;
288         }
289         if (unlikely(iommu_batch_end() < 0L))
290                 goto iommu_map_fail;
291
292         local_irq_restore(flags);
293
294         return ret;
295
296 bad:
297         if (printk_ratelimit())
298                 WARN_ON(1);
299         return DMA_ERROR_CODE;
300
301 iommu_map_fail:
302         iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
303         return DMA_ERROR_CODE;
304 }
305
306 static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
307                               size_t sz, enum dma_data_direction direction,
308                               unsigned long attrs)
309 {
310         struct pci_pbm_info *pbm;
311         struct iommu *iommu;
312         unsigned long npages;
313         long entry;
314         u32 devhandle;
315
316         if (unlikely(direction == DMA_NONE)) {
317                 if (printk_ratelimit())
318                         WARN_ON(1);
319                 return;
320         }
321
322         iommu = dev->archdata.iommu;
323         pbm = dev->archdata.host_controller;
324         devhandle = pbm->devhandle;
325
326         npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
327         npages >>= IO_PAGE_SHIFT;
328         bus_addr &= IO_PAGE_MASK;
329         entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
330         dma_4v_iommu_demap(&devhandle, entry, npages);
331         iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
332 }
333
334 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
335                          int nelems, enum dma_data_direction direction,
336                          unsigned long attrs)
337 {
338         struct scatterlist *s, *outs, *segstart;
339         unsigned long flags, handle, prot;
340         dma_addr_t dma_next = 0, dma_addr;
341         unsigned int max_seg_size;
342         unsigned long seg_boundary_size;
343         int outcount, incount, i;
344         struct iommu *iommu;
345         unsigned long base_shift;
346         long err;
347
348         BUG_ON(direction == DMA_NONE);
349
350         iommu = dev->archdata.iommu;
351         if (nelems == 0 || !iommu)
352                 return 0;
353         
354         prot = HV_PCI_MAP_ATTR_READ;
355         if (direction != DMA_TO_DEVICE)
356                 prot |= HV_PCI_MAP_ATTR_WRITE;
357
358         outs = s = segstart = &sglist[0];
359         outcount = 1;
360         incount = nelems;
361         handle = 0;
362
363         /* Init first segment length for backout at failure */
364         outs->dma_length = 0;
365
366         local_irq_save(flags);
367
368         iommu_batch_start(dev, prot, ~0UL);
369
370         max_seg_size = dma_get_max_seg_size(dev);
371         seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
372                                   IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
373         base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
374         for_each_sg(sglist, s, nelems, i) {
375                 unsigned long paddr, npages, entry, out_entry = 0, slen;
376
377                 slen = s->length;
378                 /* Sanity check */
379                 if (slen == 0) {
380                         dma_next = 0;
381                         continue;
382                 }
383                 /* Allocate iommu entries for that segment */
384                 paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
385                 npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
386                 entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
387                                               &handle, (unsigned long)(-1), 0);
388
389                 /* Handle failure */
390                 if (unlikely(entry == IOMMU_ERROR_CODE)) {
391                         if (printk_ratelimit())
392                                 printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
393                                        " npages %lx\n", iommu, paddr, npages);
394                         goto iommu_map_failed;
395                 }
396
397                 iommu_batch_new_entry(entry);
398
399                 /* Convert entry to a dma_addr_t */
400                 dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT);
401                 dma_addr |= (s->offset & ~IO_PAGE_MASK);
402
403                 /* Insert into HW table */
404                 paddr &= IO_PAGE_MASK;
405                 while (npages--) {
406                         err = iommu_batch_add(paddr);
407                         if (unlikely(err < 0L))
408                                 goto iommu_map_failed;
409                         paddr += IO_PAGE_SIZE;
410                 }
411
412                 /* If we are in an open segment, try merging */
413                 if (segstart != s) {
414                         /* We cannot merge if:
415                          * - allocated dma_addr isn't contiguous to previous allocation
416                          */
417                         if ((dma_addr != dma_next) ||
418                             (outs->dma_length + s->length > max_seg_size) ||
419                             (is_span_boundary(out_entry, base_shift,
420                                               seg_boundary_size, outs, s))) {
421                                 /* Can't merge: create a new segment */
422                                 segstart = s;
423                                 outcount++;
424                                 outs = sg_next(outs);
425                         } else {
426                                 outs->dma_length += s->length;
427                         }
428                 }
429
430                 if (segstart == s) {
431                         /* This is a new segment, fill entries */
432                         outs->dma_address = dma_addr;
433                         outs->dma_length = slen;
434                         out_entry = entry;
435                 }
436
437                 /* Calculate next page pointer for contiguous check */
438                 dma_next = dma_addr + slen;
439         }
440
441         err = iommu_batch_end();
442
443         if (unlikely(err < 0L))
444                 goto iommu_map_failed;
445
446         local_irq_restore(flags);
447
448         if (outcount < incount) {
449                 outs = sg_next(outs);
450                 outs->dma_address = DMA_ERROR_CODE;
451                 outs->dma_length = 0;
452         }
453
454         return outcount;
455
456 iommu_map_failed:
457         for_each_sg(sglist, s, nelems, i) {
458                 if (s->dma_length != 0) {
459                         unsigned long vaddr, npages;
460
461                         vaddr = s->dma_address & IO_PAGE_MASK;
462                         npages = iommu_num_pages(s->dma_address, s->dma_length,
463                                                  IO_PAGE_SIZE);
464                         iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
465                                              IOMMU_ERROR_CODE);
466                         /* XXX demap? XXX */
467                         s->dma_address = DMA_ERROR_CODE;
468                         s->dma_length = 0;
469                 }
470                 if (s == outs)
471                         break;
472         }
473         local_irq_restore(flags);
474
475         return 0;
476 }
477
478 static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
479                             int nelems, enum dma_data_direction direction,
480                             unsigned long attrs)
481 {
482         struct pci_pbm_info *pbm;
483         struct scatterlist *sg;
484         struct iommu *iommu;
485         unsigned long flags, entry;
486         u32 devhandle;
487
488         BUG_ON(direction == DMA_NONE);
489
490         iommu = dev->archdata.iommu;
491         pbm = dev->archdata.host_controller;
492         devhandle = pbm->devhandle;
493         
494         local_irq_save(flags);
495
496         sg = sglist;
497         while (nelems--) {
498                 dma_addr_t dma_handle = sg->dma_address;
499                 unsigned int len = sg->dma_length;
500                 unsigned long npages;
501                 struct iommu_map_table *tbl = &iommu->tbl;
502                 unsigned long shift = IO_PAGE_SHIFT;
503
504                 if (!len)
505                         break;
506                 npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
507                 entry = ((dma_handle - tbl->table_map_base) >> shift);
508                 dma_4v_iommu_demap(&devhandle, entry, npages);
509                 iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
510                                      IOMMU_ERROR_CODE);
511                 sg = sg_next(sg);
512         }
513
514         local_irq_restore(flags);
515 }
516
517 static struct dma_map_ops sun4v_dma_ops = {
518         .alloc                          = dma_4v_alloc_coherent,
519         .free                           = dma_4v_free_coherent,
520         .map_page                       = dma_4v_map_page,
521         .unmap_page                     = dma_4v_unmap_page,
522         .map_sg                         = dma_4v_map_sg,
523         .unmap_sg                       = dma_4v_unmap_sg,
524 };
525
526 static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
527 {
528         struct property *prop;
529         struct device_node *dp;
530
531         dp = pbm->op->dev.of_node;
532         prop = of_find_property(dp, "66mhz-capable", NULL);
533         pbm->is_66mhz_capable = (prop != NULL);
534         pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
535
536         /* XXX register error interrupt handlers XXX */
537 }
538
539 static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
540                                             struct iommu_map_table *iommu)
541 {
542         struct iommu_pool *pool;
543         unsigned long i, pool_nr, cnt = 0;
544         u32 devhandle;
545
546         devhandle = pbm->devhandle;
547         for (pool_nr = 0; pool_nr < iommu->nr_pools; pool_nr++) {
548                 pool = &(iommu->pools[pool_nr]);
549                 for (i = pool->start; i <= pool->end; i++) {
550                         unsigned long ret, io_attrs, ra;
551
552                         ret = pci_sun4v_iommu_getmap(devhandle,
553                                                      HV_PCI_TSBID(0, i),
554                                                      &io_attrs, &ra);
555                         if (ret == HV_EOK) {
556                                 if (page_in_phys_avail(ra)) {
557                                         pci_sun4v_iommu_demap(devhandle,
558                                                               HV_PCI_TSBID(0,
559                                                               i), 1);
560                                 } else {
561                                         cnt++;
562                                         __set_bit(i, iommu->map);
563                                 }
564                         }
565                 }
566         }
567         return cnt;
568 }
569
570 static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
571 {
572         static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
573         struct iommu *iommu = pbm->iommu;
574         unsigned long num_tsb_entries, sz;
575         u32 dma_mask, dma_offset;
576         const u32 *vdma;
577
578         vdma = of_get_property(pbm->op->dev.of_node, "virtual-dma", NULL);
579         if (!vdma)
580                 vdma = vdma_default;
581
582         if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) {
583                 printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
584                        vdma[0], vdma[1]);
585                 return -EINVAL;
586         }
587
588         dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
589         num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
590
591         dma_offset = vdma[0];
592
593         /* Setup initial software IOMMU state. */
594         spin_lock_init(&iommu->lock);
595         iommu->ctx_lowest_free = 1;
596         iommu->tbl.table_map_base = dma_offset;
597         iommu->dma_addr_mask = dma_mask;
598
599         /* Allocate and initialize the free area map.  */
600         sz = (num_tsb_entries + 7) / 8;
601         sz = (sz + 7UL) & ~7UL;
602         iommu->tbl.map = kzalloc(sz, GFP_KERNEL);
603         if (!iommu->tbl.map) {
604                 printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n");
605                 return -ENOMEM;
606         }
607         iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
608                             NULL, false /* no large_pool */,
609                             0 /* default npools */,
610                             false /* want span boundary checking */);
611         sz = probe_existing_entries(pbm, &iommu->tbl);
612         if (sz)
613                 printk("%s: Imported %lu TSB entries from OBP\n",
614                        pbm->name, sz);
615
616         return 0;
617 }
618
619 #ifdef CONFIG_PCI_MSI
620 struct pci_sun4v_msiq_entry {
621         u64             version_type;
622 #define MSIQ_VERSION_MASK               0xffffffff00000000UL
623 #define MSIQ_VERSION_SHIFT              32
624 #define MSIQ_TYPE_MASK                  0x00000000000000ffUL
625 #define MSIQ_TYPE_SHIFT                 0
626 #define MSIQ_TYPE_NONE                  0x00
627 #define MSIQ_TYPE_MSG                   0x01
628 #define MSIQ_TYPE_MSI32                 0x02
629 #define MSIQ_TYPE_MSI64                 0x03
630 #define MSIQ_TYPE_INTX                  0x08
631 #define MSIQ_TYPE_NONE2                 0xff
632
633         u64             intx_sysino;
634         u64             reserved1;
635         u64             stick;
636         u64             req_id;  /* bus/device/func */
637 #define MSIQ_REQID_BUS_MASK             0xff00UL
638 #define MSIQ_REQID_BUS_SHIFT            8
639 #define MSIQ_REQID_DEVICE_MASK          0x00f8UL
640 #define MSIQ_REQID_DEVICE_SHIFT         3
641 #define MSIQ_REQID_FUNC_MASK            0x0007UL
642 #define MSIQ_REQID_FUNC_SHIFT           0
643
644         u64             msi_address;
645
646         /* The format of this value is message type dependent.
647          * For MSI bits 15:0 are the data from the MSI packet.
648          * For MSI-X bits 31:0 are the data from the MSI packet.
649          * For MSG, the message code and message routing code where:
650          *      bits 39:32 is the bus/device/fn of the msg target-id
651          *      bits 18:16 is the message routing code
652          *      bits 7:0 is the message code
653          * For INTx the low order 2-bits are:
654          *      00 - INTA
655          *      01 - INTB
656          *      10 - INTC
657          *      11 - INTD
658          */
659         u64             msi_data;
660
661         u64             reserved2;
662 };
663
664 static int pci_sun4v_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
665                               unsigned long *head)
666 {
667         unsigned long err, limit;
668
669         err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, head);
670         if (unlikely(err))
671                 return -ENXIO;
672
673         limit = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
674         if (unlikely(*head >= limit))
675                 return -EFBIG;
676
677         return 0;
678 }
679
680 static int pci_sun4v_dequeue_msi(struct pci_pbm_info *pbm,
681                                  unsigned long msiqid, unsigned long *head,
682                                  unsigned long *msi)
683 {
684         struct pci_sun4v_msiq_entry *ep;
685         unsigned long err, type;
686
687         /* Note: void pointer arithmetic, 'head' is a byte offset  */
688         ep = (pbm->msi_queues + ((msiqid - pbm->msiq_first) *
689                                  (pbm->msiq_ent_count *
690                                   sizeof(struct pci_sun4v_msiq_entry))) +
691               *head);
692
693         if ((ep->version_type & MSIQ_TYPE_MASK) == 0)
694                 return 0;
695
696         type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT;
697         if (unlikely(type != MSIQ_TYPE_MSI32 &&
698                      type != MSIQ_TYPE_MSI64))
699                 return -EINVAL;
700
701         *msi = ep->msi_data;
702
703         err = pci_sun4v_msi_setstate(pbm->devhandle,
704                                      ep->msi_data /* msi_num */,
705                                      HV_MSISTATE_IDLE);
706         if (unlikely(err))
707                 return -ENXIO;
708
709         /* Clear the entry.  */
710         ep->version_type &= ~MSIQ_TYPE_MASK;
711
712         (*head) += sizeof(struct pci_sun4v_msiq_entry);
713         if (*head >=
714             (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)))
715                 *head = 0;
716
717         return 1;
718 }
719
720 static int pci_sun4v_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
721                               unsigned long head)
722 {
723         unsigned long err;
724
725         err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head);
726         if (unlikely(err))
727                 return -EINVAL;
728
729         return 0;
730 }
731
732 static int pci_sun4v_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
733                                unsigned long msi, int is_msi64)
734 {
735         if (pci_sun4v_msi_setmsiq(pbm->devhandle, msi, msiqid,
736                                   (is_msi64 ?
737                                    HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32)))
738                 return -ENXIO;
739         if (pci_sun4v_msi_setstate(pbm->devhandle, msi, HV_MSISTATE_IDLE))
740                 return -ENXIO;
741         if (pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_VALID))
742                 return -ENXIO;
743         return 0;
744 }
745
746 static int pci_sun4v_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
747 {
748         unsigned long err, msiqid;
749
750         err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi, &msiqid);
751         if (err)
752                 return -ENXIO;
753
754         pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_INVALID);
755
756         return 0;
757 }
758
759 static int pci_sun4v_msiq_alloc(struct pci_pbm_info *pbm)
760 {
761         unsigned long q_size, alloc_size, pages, order;
762         int i;
763
764         q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
765         alloc_size = (pbm->msiq_num * q_size);
766         order = get_order(alloc_size);
767         pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
768         if (pages == 0UL) {
769                 printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
770                        order);
771                 return -ENOMEM;
772         }
773         memset((char *)pages, 0, PAGE_SIZE << order);
774         pbm->msi_queues = (void *) pages;
775
776         for (i = 0; i < pbm->msiq_num; i++) {
777                 unsigned long err, base = __pa(pages + (i * q_size));
778                 unsigned long ret1, ret2;
779
780                 err = pci_sun4v_msiq_conf(pbm->devhandle,
781                                           pbm->msiq_first + i,
782                                           base, pbm->msiq_ent_count);
783                 if (err) {
784                         printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n",
785                                err);
786                         goto h_error;
787                 }
788
789                 err = pci_sun4v_msiq_info(pbm->devhandle,
790                                           pbm->msiq_first + i,
791                                           &ret1, &ret2);
792                 if (err) {
793                         printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n",
794                                err);
795                         goto h_error;
796                 }
797                 if (ret1 != base || ret2 != pbm->msiq_ent_count) {
798                         printk(KERN_ERR "MSI: Bogus qconf "
799                                "expected[%lx:%x] got[%lx:%lx]\n",
800                                base, pbm->msiq_ent_count,
801                                ret1, ret2);
802                         goto h_error;
803                 }
804         }
805
806         return 0;
807
808 h_error:
809         free_pages(pages, order);
810         return -EINVAL;
811 }
812
813 static void pci_sun4v_msiq_free(struct pci_pbm_info *pbm)
814 {
815         unsigned long q_size, alloc_size, pages, order;
816         int i;
817
818         for (i = 0; i < pbm->msiq_num; i++) {
819                 unsigned long msiqid = pbm->msiq_first + i;
820
821                 (void) pci_sun4v_msiq_conf(pbm->devhandle, msiqid, 0UL, 0);
822         }
823
824         q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
825         alloc_size = (pbm->msiq_num * q_size);
826         order = get_order(alloc_size);
827
828         pages = (unsigned long) pbm->msi_queues;
829
830         free_pages(pages, order);
831
832         pbm->msi_queues = NULL;
833 }
834
835 static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm,
836                                     unsigned long msiqid,
837                                     unsigned long devino)
838 {
839         unsigned int irq = sun4v_build_irq(pbm->devhandle, devino);
840
841         if (!irq)
842                 return -ENOMEM;
843
844         if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
845                 return -EINVAL;
846         if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
847                 return -EINVAL;
848
849         return irq;
850 }
851
852 static const struct sparc64_msiq_ops pci_sun4v_msiq_ops = {
853         .get_head       =       pci_sun4v_get_head,
854         .dequeue_msi    =       pci_sun4v_dequeue_msi,
855         .set_head       =       pci_sun4v_set_head,
856         .msi_setup      =       pci_sun4v_msi_setup,
857         .msi_teardown   =       pci_sun4v_msi_teardown,
858         .msiq_alloc     =       pci_sun4v_msiq_alloc,
859         .msiq_free      =       pci_sun4v_msiq_free,
860         .msiq_build_irq =       pci_sun4v_msiq_build_irq,
861 };
862
863 static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
864 {
865         sparc64_pbm_msi_init(pbm, &pci_sun4v_msiq_ops);
866 }
867 #else /* CONFIG_PCI_MSI */
868 static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
869 {
870 }
871 #endif /* !(CONFIG_PCI_MSI) */
872
873 static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
874                               struct platform_device *op, u32 devhandle)
875 {
876         struct device_node *dp = op->dev.of_node;
877         int err;
878
879         pbm->numa_node = of_node_to_nid(dp);
880
881         pbm->pci_ops = &sun4v_pci_ops;
882         pbm->config_space_reg_bits = 12;
883
884         pbm->index = pci_num_pbms++;
885
886         pbm->op = op;
887
888         pbm->devhandle = devhandle;
889
890         pbm->name = dp->full_name;
891
892         printk("%s: SUN4V PCI Bus Module\n", pbm->name);
893         printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
894
895         pci_determine_mem_io_space(pbm);
896
897         pci_get_pbm_props(pbm);
898
899         err = pci_sun4v_iommu_init(pbm);
900         if (err)
901                 return err;
902
903         pci_sun4v_msi_init(pbm);
904
905         pci_sun4v_scan_bus(pbm, &op->dev);
906
907         pbm->next = pci_pbm_root;
908         pci_pbm_root = pbm;
909
910         return 0;
911 }
912
913 static int pci_sun4v_probe(struct platform_device *op)
914 {
915         const struct linux_prom64_registers *regs;
916         static int hvapi_negotiated = 0;
917         struct pci_pbm_info *pbm;
918         struct device_node *dp;
919         struct iommu *iommu;
920         u32 devhandle;
921         int i, err = -ENODEV;
922
923         dp = op->dev.of_node;
924
925         if (!hvapi_negotiated++) {
926                 for (i = 0; i < ARRAY_SIZE(vpci_versions); i++) {
927                         vpci_major = vpci_versions[i].major;
928                         vpci_minor = vpci_versions[i].minor;
929
930                         err = sun4v_hvapi_register(HV_GRP_PCI, vpci_major,
931                                                    &vpci_minor);
932                         if (!err)
933                                 break;
934                 }
935
936                 if (err) {
937                         pr_err(PFX "Could not register hvapi, err=%d\n", err);
938                         return err;
939                 }
940                 pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n",
941                         vpci_major, vpci_minor);
942
943                 dma_ops = &sun4v_dma_ops;
944         }
945
946         regs = of_get_property(dp, "reg", NULL);
947         err = -ENODEV;
948         if (!regs) {
949                 printk(KERN_ERR PFX "Could not find config registers\n");
950                 goto out_err;
951         }
952         devhandle = (regs->phys_addr >> 32UL) & 0x0fffffff;
953
954         err = -ENOMEM;
955         if (!iommu_batch_initialized) {
956                 for_each_possible_cpu(i) {
957                         unsigned long page = get_zeroed_page(GFP_KERNEL);
958
959                         if (!page)
960                                 goto out_err;
961
962                         per_cpu(iommu_batch, i).pglist = (u64 *) page;
963                 }
964                 iommu_batch_initialized = 1;
965         }
966
967         pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
968         if (!pbm) {
969                 printk(KERN_ERR PFX "Could not allocate pci_pbm_info\n");
970                 goto out_err;
971         }
972
973         iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
974         if (!iommu) {
975                 printk(KERN_ERR PFX "Could not allocate pbm iommu\n");
976                 goto out_free_controller;
977         }
978
979         pbm->iommu = iommu;
980
981         err = pci_sun4v_pbm_init(pbm, op, devhandle);
982         if (err)
983                 goto out_free_iommu;
984
985         dev_set_drvdata(&op->dev, pbm);
986
987         return 0;
988
989 out_free_iommu:
990         kfree(pbm->iommu);
991
992 out_free_controller:
993         kfree(pbm);
994
995 out_err:
996         return err;
997 }
998
999 static const struct of_device_id pci_sun4v_match[] = {
1000         {
1001                 .name = "pci",
1002                 .compatible = "SUNW,sun4v-pci",
1003         },
1004         {},
1005 };
1006
1007 static struct platform_driver pci_sun4v_driver = {
1008         .driver = {
1009                 .name = DRIVER_NAME,
1010                 .of_match_table = pci_sun4v_match,
1011         },
1012         .probe          = pci_sun4v_probe,
1013 };
1014
1015 static int __init pci_sun4v_init(void)
1016 {
1017         return platform_driver_register(&pci_sun4v_driver);
1018 }
1019
1020 subsys_initcall(pci_sun4v_init);