tile PCI RC: make default consistent DMA mask 32-bit
[cascardo/linux.git] / arch / tile / kernel / pci-dma.c
1 /*
2  * Copyright 2010 Tilera Corporation. All Rights Reserved.
3  *
4  *   This program is free software; you can redistribute it and/or
5  *   modify it under the terms of the GNU General Public License
6  *   as published by the Free Software Foundation, version 2.
7  *
8  *   This program is distributed in the hope that it will be useful, but
9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  *   NON INFRINGEMENT.  See the GNU General Public License for
12  *   more details.
13  */
14
15 #include <linux/mm.h>
16 #include <linux/dma-mapping.h>
17 #include <linux/swiotlb.h>
18 #include <linux/vmalloc.h>
19 #include <linux/export.h>
20 #include <asm/tlbflush.h>
21 #include <asm/homecache.h>
22
23 /* Generic DMA mapping functions: */
24
25 /*
26  * Allocate what Linux calls "coherent" memory.  On TILEPro this is
27  * uncached memory; on TILE-Gx it is hash-for-home memory.
28  */
29 #ifdef __tilepro__
30 #define PAGE_HOME_DMA PAGE_HOME_UNCACHED
31 #else
32 #define PAGE_HOME_DMA PAGE_HOME_HASH
33 #endif
34
35 static void *tile_dma_alloc_coherent(struct device *dev, size_t size,
36                                      dma_addr_t *dma_handle, gfp_t gfp,
37                                      struct dma_attrs *attrs)
38 {
39         u64 dma_mask = (dev && dev->coherent_dma_mask) ?
40                 dev->coherent_dma_mask : DMA_BIT_MASK(32);
41         int node = dev ? dev_to_node(dev) : 0;
42         int order = get_order(size);
43         struct page *pg;
44         dma_addr_t addr;
45
46         gfp |= __GFP_ZERO;
47
48         /*
49          * If the mask specifies that the memory be in the first 4 GB, then
50          * we force the allocation to come from the DMA zone.  We also
51          * force the node to 0 since that's the only node where the DMA
52          * zone isn't empty.  If the mask size is smaller than 32 bits, we
53          * may still not be able to guarantee a suitable memory address, in
54          * which case we will return NULL.  But such devices are uncommon.
55          */
56         if (dma_mask <= DMA_BIT_MASK(32)) {
57                 gfp |= GFP_DMA;
58                 node = 0;
59         }
60
61         pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
62         if (pg == NULL)
63                 return NULL;
64
65         addr = page_to_phys(pg);
66         if (addr + size > dma_mask) {
67                 __homecache_free_pages(pg, order);
68                 return NULL;
69         }
70
71         *dma_handle = addr;
72
73         return page_address(pg);
74 }
75
76 /*
77  * Free memory that was allocated with tile_dma_alloc_coherent.
78  */
79 static void tile_dma_free_coherent(struct device *dev, size_t size,
80                                    void *vaddr, dma_addr_t dma_handle,
81                                    struct dma_attrs *attrs)
82 {
83         homecache_free_pages((unsigned long)vaddr, get_order(size));
84 }
85
86 /*
87  * The map routines "map" the specified address range for DMA
88  * accesses.  The memory belongs to the device after this call is
89  * issued, until it is unmapped with dma_unmap_single.
90  *
91  * We don't need to do any mapping, we just flush the address range
92  * out of the cache and return a DMA address.
93  *
94  * The unmap routines do whatever is necessary before the processor
95  * accesses the memory again, and must be called before the driver
96  * touches the memory.  We can get away with a cache invalidate if we
97  * can count on nothing having been touched.
98  */
99
100 /* Set up a single page for DMA access. */
101 static void __dma_prep_page(struct page *page, unsigned long offset,
102                             size_t size, enum dma_data_direction direction)
103 {
104         /*
105          * Flush the page from cache if necessary.
106          * On tilegx, data is delivered to hash-for-home L3; on tilepro,
107          * data is delivered direct to memory.
108          *
109          * NOTE: If we were just doing DMA_TO_DEVICE we could optimize
110          * this to be a "flush" not a "finv" and keep some of the
111          * state in cache across the DMA operation, but it doesn't seem
112          * worth creating the necessary flush_buffer_xxx() infrastructure.
113          */
114         int home = page_home(page);
115         switch (home) {
116         case PAGE_HOME_HASH:
117 #ifdef __tilegx__
118                 return;
119 #endif
120                 break;
121         case PAGE_HOME_UNCACHED:
122 #ifdef __tilepro__
123                 return;
124 #endif
125                 break;
126         case PAGE_HOME_IMMUTABLE:
127                 /* Should be going to the device only. */
128                 BUG_ON(direction == DMA_FROM_DEVICE ||
129                        direction == DMA_BIDIRECTIONAL);
130                 return;
131         case PAGE_HOME_INCOHERENT:
132                 /* Incoherent anyway, so no need to work hard here. */
133                 return;
134         default:
135                 BUG_ON(home < 0 || home >= NR_CPUS);
136                 break;
137         }
138         homecache_finv_page(page);
139
140 #ifdef DEBUG_ALIGNMENT
141         /* Warn if the region isn't cacheline aligned. */
142         if (offset & (L2_CACHE_BYTES - 1) || (size & (L2_CACHE_BYTES - 1)))
143                 pr_warn("Unaligned DMA to non-hfh memory: PA %#llx/%#lx\n",
144                         PFN_PHYS(page_to_pfn(page)) + offset, size);
145 #endif
146 }
147
148 /* Make the page ready to be read by the core. */
149 static void __dma_complete_page(struct page *page, unsigned long offset,
150                                 size_t size, enum dma_data_direction direction)
151 {
152 #ifdef __tilegx__
153         switch (page_home(page)) {
154         case PAGE_HOME_HASH:
155                 /* I/O device delivered data the way the cpu wanted it. */
156                 break;
157         case PAGE_HOME_INCOHERENT:
158                 /* Incoherent anyway, so no need to work hard here. */
159                 break;
160         case PAGE_HOME_IMMUTABLE:
161                 /* Extra read-only copies are not a problem. */
162                 break;
163         default:
164                 /* Flush the bogus hash-for-home I/O entries to memory. */
165                 homecache_finv_map_page(page, PAGE_HOME_HASH);
166                 break;
167         }
168 #endif
169 }
170
171 static void __dma_prep_pa_range(dma_addr_t dma_addr, size_t size,
172                                 enum dma_data_direction direction)
173 {
174         struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
175         unsigned long offset = dma_addr & (PAGE_SIZE - 1);
176         size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
177
178         while (size != 0) {
179                 __dma_prep_page(page, offset, bytes, direction);
180                 size -= bytes;
181                 ++page;
182                 offset = 0;
183                 bytes = min((size_t)PAGE_SIZE, size);
184         }
185 }
186
187 static void __dma_complete_pa_range(dma_addr_t dma_addr, size_t size,
188                                     enum dma_data_direction direction)
189 {
190         struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
191         unsigned long offset = dma_addr & (PAGE_SIZE - 1);
192         size_t bytes = min(size, (size_t)(PAGE_SIZE - offset));
193
194         while (size != 0) {
195                 __dma_complete_page(page, offset, bytes, direction);
196                 size -= bytes;
197                 ++page;
198                 offset = 0;
199                 bytes = min((size_t)PAGE_SIZE, size);
200         }
201 }
202
203 static int tile_dma_map_sg(struct device *dev, struct scatterlist *sglist,
204                            int nents, enum dma_data_direction direction,
205                            struct dma_attrs *attrs)
206 {
207         struct scatterlist *sg;
208         int i;
209
210         BUG_ON(!valid_dma_direction(direction));
211
212         WARN_ON(nents == 0 || sglist->length == 0);
213
214         for_each_sg(sglist, sg, nents, i) {
215                 sg->dma_address = sg_phys(sg);
216                 __dma_prep_pa_range(sg->dma_address, sg->length, direction);
217 #ifdef CONFIG_NEED_SG_DMA_LENGTH
218                 sg->dma_length = sg->length;
219 #endif
220         }
221
222         return nents;
223 }
224
225 static void tile_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
226                               int nents, enum dma_data_direction direction,
227                               struct dma_attrs *attrs)
228 {
229         struct scatterlist *sg;
230         int i;
231
232         BUG_ON(!valid_dma_direction(direction));
233         for_each_sg(sglist, sg, nents, i) {
234                 sg->dma_address = sg_phys(sg);
235                 __dma_complete_pa_range(sg->dma_address, sg->length,
236                                         direction);
237         }
238 }
239
240 static dma_addr_t tile_dma_map_page(struct device *dev, struct page *page,
241                                     unsigned long offset, size_t size,
242                                     enum dma_data_direction direction,
243                                     struct dma_attrs *attrs)
244 {
245         BUG_ON(!valid_dma_direction(direction));
246
247         BUG_ON(offset + size > PAGE_SIZE);
248         __dma_prep_page(page, offset, size, direction);
249
250         return page_to_pa(page) + offset;
251 }
252
253 static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
254                                 size_t size, enum dma_data_direction direction,
255                                 struct dma_attrs *attrs)
256 {
257         BUG_ON(!valid_dma_direction(direction));
258
259         __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
260                             dma_address & (PAGE_SIZE - 1), size, direction);
261 }
262
263 static void tile_dma_sync_single_for_cpu(struct device *dev,
264                                          dma_addr_t dma_handle,
265                                          size_t size,
266                                          enum dma_data_direction direction)
267 {
268         BUG_ON(!valid_dma_direction(direction));
269
270         __dma_complete_pa_range(dma_handle, size, direction);
271 }
272
273 static void tile_dma_sync_single_for_device(struct device *dev,
274                                             dma_addr_t dma_handle, size_t size,
275                                             enum dma_data_direction direction)
276 {
277         __dma_prep_pa_range(dma_handle, size, direction);
278 }
279
280 static void tile_dma_sync_sg_for_cpu(struct device *dev,
281                                      struct scatterlist *sglist, int nelems,
282                                      enum dma_data_direction direction)
283 {
284         struct scatterlist *sg;
285         int i;
286
287         BUG_ON(!valid_dma_direction(direction));
288         WARN_ON(nelems == 0 || sglist->length == 0);
289
290         for_each_sg(sglist, sg, nelems, i) {
291                 dma_sync_single_for_cpu(dev, sg->dma_address,
292                                         sg_dma_len(sg), direction);
293         }
294 }
295
296 static void tile_dma_sync_sg_for_device(struct device *dev,
297                                         struct scatterlist *sglist, int nelems,
298                                         enum dma_data_direction direction)
299 {
300         struct scatterlist *sg;
301         int i;
302
303         BUG_ON(!valid_dma_direction(direction));
304         WARN_ON(nelems == 0 || sglist->length == 0);
305
306         for_each_sg(sglist, sg, nelems, i) {
307                 dma_sync_single_for_device(dev, sg->dma_address,
308                                            sg_dma_len(sg), direction);
309         }
310 }
311
312 static inline int
313 tile_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
314 {
315         return 0;
316 }
317
318 static inline int
319 tile_dma_supported(struct device *dev, u64 mask)
320 {
321         return 1;
322 }
323
324 static struct dma_map_ops tile_default_dma_map_ops = {
325         .alloc = tile_dma_alloc_coherent,
326         .free = tile_dma_free_coherent,
327         .map_page = tile_dma_map_page,
328         .unmap_page = tile_dma_unmap_page,
329         .map_sg = tile_dma_map_sg,
330         .unmap_sg = tile_dma_unmap_sg,
331         .sync_single_for_cpu = tile_dma_sync_single_for_cpu,
332         .sync_single_for_device = tile_dma_sync_single_for_device,
333         .sync_sg_for_cpu = tile_dma_sync_sg_for_cpu,
334         .sync_sg_for_device = tile_dma_sync_sg_for_device,
335         .mapping_error = tile_dma_mapping_error,
336         .dma_supported = tile_dma_supported
337 };
338
339 struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops;
340 EXPORT_SYMBOL(tile_dma_map_ops);
341
342 /* Generic PCI DMA mapping functions */
343
344 static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size,
345                                          dma_addr_t *dma_handle, gfp_t gfp,
346                                          struct dma_attrs *attrs)
347 {
348         int node = dev_to_node(dev);
349         int order = get_order(size);
350         struct page *pg;
351         dma_addr_t addr;
352
353         gfp |= __GFP_ZERO;
354
355         pg = homecache_alloc_pages_node(node, gfp, order, PAGE_HOME_DMA);
356         if (pg == NULL)
357                 return NULL;
358
359         addr = page_to_phys(pg);
360
361         *dma_handle = addr + get_dma_offset(dev);
362
363         return page_address(pg);
364 }
365
366 /*
367  * Free memory that was allocated with tile_pci_dma_alloc_coherent.
368  */
369 static void tile_pci_dma_free_coherent(struct device *dev, size_t size,
370                                        void *vaddr, dma_addr_t dma_handle,
371                                        struct dma_attrs *attrs)
372 {
373         homecache_free_pages((unsigned long)vaddr, get_order(size));
374 }
375
376 static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist,
377                                int nents, enum dma_data_direction direction,
378                                struct dma_attrs *attrs)
379 {
380         struct scatterlist *sg;
381         int i;
382
383         BUG_ON(!valid_dma_direction(direction));
384
385         WARN_ON(nents == 0 || sglist->length == 0);
386
387         for_each_sg(sglist, sg, nents, i) {
388                 sg->dma_address = sg_phys(sg);
389                 __dma_prep_pa_range(sg->dma_address, sg->length, direction);
390
391                 sg->dma_address = sg->dma_address + get_dma_offset(dev);
392 #ifdef CONFIG_NEED_SG_DMA_LENGTH
393                 sg->dma_length = sg->length;
394 #endif
395         }
396
397         return nents;
398 }
399
400 static void tile_pci_dma_unmap_sg(struct device *dev,
401                                   struct scatterlist *sglist, int nents,
402                                   enum dma_data_direction direction,
403                                   struct dma_attrs *attrs)
404 {
405         struct scatterlist *sg;
406         int i;
407
408         BUG_ON(!valid_dma_direction(direction));
409         for_each_sg(sglist, sg, nents, i) {
410                 sg->dma_address = sg_phys(sg);
411                 __dma_complete_pa_range(sg->dma_address, sg->length,
412                                         direction);
413         }
414 }
415
416 static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page,
417                                         unsigned long offset, size_t size,
418                                         enum dma_data_direction direction,
419                                         struct dma_attrs *attrs)
420 {
421         BUG_ON(!valid_dma_direction(direction));
422
423         BUG_ON(offset + size > PAGE_SIZE);
424         __dma_prep_page(page, offset, size, direction);
425
426         return page_to_pa(page) + offset + get_dma_offset(dev);
427 }
428
429 static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
430                                     size_t size,
431                                     enum dma_data_direction direction,
432                                     struct dma_attrs *attrs)
433 {
434         BUG_ON(!valid_dma_direction(direction));
435
436         dma_address -= get_dma_offset(dev);
437
438         __dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
439                             dma_address & (PAGE_SIZE - 1), size, direction);
440 }
441
442 static void tile_pci_dma_sync_single_for_cpu(struct device *dev,
443                                              dma_addr_t dma_handle,
444                                              size_t size,
445                                              enum dma_data_direction direction)
446 {
447         BUG_ON(!valid_dma_direction(direction));
448
449         dma_handle -= get_dma_offset(dev);
450
451         __dma_complete_pa_range(dma_handle, size, direction);
452 }
453
454 static void tile_pci_dma_sync_single_for_device(struct device *dev,
455                                                 dma_addr_t dma_handle,
456                                                 size_t size,
457                                                 enum dma_data_direction
458                                                 direction)
459 {
460         dma_handle -= get_dma_offset(dev);
461
462         __dma_prep_pa_range(dma_handle, size, direction);
463 }
464
465 static void tile_pci_dma_sync_sg_for_cpu(struct device *dev,
466                                          struct scatterlist *sglist,
467                                          int nelems,
468                                          enum dma_data_direction direction)
469 {
470         struct scatterlist *sg;
471         int i;
472
473         BUG_ON(!valid_dma_direction(direction));
474         WARN_ON(nelems == 0 || sglist->length == 0);
475
476         for_each_sg(sglist, sg, nelems, i) {
477                 dma_sync_single_for_cpu(dev, sg->dma_address,
478                                         sg_dma_len(sg), direction);
479         }
480 }
481
482 static void tile_pci_dma_sync_sg_for_device(struct device *dev,
483                                             struct scatterlist *sglist,
484                                             int nelems,
485                                             enum dma_data_direction direction)
486 {
487         struct scatterlist *sg;
488         int i;
489
490         BUG_ON(!valid_dma_direction(direction));
491         WARN_ON(nelems == 0 || sglist->length == 0);
492
493         for_each_sg(sglist, sg, nelems, i) {
494                 dma_sync_single_for_device(dev, sg->dma_address,
495                                            sg_dma_len(sg), direction);
496         }
497 }
498
499 static inline int
500 tile_pci_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
501 {
502         return 0;
503 }
504
505 static inline int
506 tile_pci_dma_supported(struct device *dev, u64 mask)
507 {
508         return 1;
509 }
510
511 static struct dma_map_ops tile_pci_default_dma_map_ops = {
512         .alloc = tile_pci_dma_alloc_coherent,
513         .free = tile_pci_dma_free_coherent,
514         .map_page = tile_pci_dma_map_page,
515         .unmap_page = tile_pci_dma_unmap_page,
516         .map_sg = tile_pci_dma_map_sg,
517         .unmap_sg = tile_pci_dma_unmap_sg,
518         .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
519         .sync_single_for_device = tile_pci_dma_sync_single_for_device,
520         .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
521         .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
522         .mapping_error = tile_pci_dma_mapping_error,
523         .dma_supported = tile_pci_dma_supported
524 };
525
526 struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops;
527 EXPORT_SYMBOL(gx_pci_dma_map_ops);
528
529 /* PCI DMA mapping functions for legacy PCI devices */
530
531 #ifdef CONFIG_SWIOTLB
532 static void *tile_swiotlb_alloc_coherent(struct device *dev, size_t size,
533                                          dma_addr_t *dma_handle, gfp_t gfp,
534                                          struct dma_attrs *attrs)
535 {
536         gfp |= GFP_DMA;
537         return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
538 }
539
540 static void tile_swiotlb_free_coherent(struct device *dev, size_t size,
541                                        void *vaddr, dma_addr_t dma_addr,
542                                        struct dma_attrs *attrs)
543 {
544         swiotlb_free_coherent(dev, size, vaddr, dma_addr);
545 }
546
547 static struct dma_map_ops pci_swiotlb_dma_ops = {
548         .alloc = tile_swiotlb_alloc_coherent,
549         .free = tile_swiotlb_free_coherent,
550         .map_page = swiotlb_map_page,
551         .unmap_page = swiotlb_unmap_page,
552         .map_sg = swiotlb_map_sg_attrs,
553         .unmap_sg = swiotlb_unmap_sg_attrs,
554         .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
555         .sync_single_for_device = swiotlb_sync_single_for_device,
556         .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
557         .sync_sg_for_device = swiotlb_sync_sg_for_device,
558         .dma_supported = swiotlb_dma_supported,
559         .mapping_error = swiotlb_dma_mapping_error,
560 };
561
562 static struct dma_map_ops pci_hybrid_dma_ops = {
563         .alloc = tile_swiotlb_alloc_coherent,
564         .free = tile_swiotlb_free_coherent,
565         .map_page = tile_pci_dma_map_page,
566         .unmap_page = tile_pci_dma_unmap_page,
567         .map_sg = tile_pci_dma_map_sg,
568         .unmap_sg = tile_pci_dma_unmap_sg,
569         .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
570         .sync_single_for_device = tile_pci_dma_sync_single_for_device,
571         .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
572         .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
573         .mapping_error = tile_pci_dma_mapping_error,
574         .dma_supported = tile_pci_dma_supported
575 };
576
577 struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops;
578 struct dma_map_ops *gx_hybrid_pci_dma_map_ops = &pci_hybrid_dma_ops;
579 #else
580 struct dma_map_ops *gx_legacy_pci_dma_map_ops;
581 struct dma_map_ops *gx_hybrid_pci_dma_map_ops;
582 #endif
583 EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops);
584 EXPORT_SYMBOL(gx_hybrid_pci_dma_map_ops);
585
586 #ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
587 int dma_set_coherent_mask(struct device *dev, u64 mask)
588 {
589         struct dma_map_ops *dma_ops = get_dma_ops(dev);
590
591         /*
592          * For PCI devices with 64-bit DMA addressing capability, promote
593          * the dma_ops to full capability for both streams and consistent
594          * memory access. For 32-bit capable devices, limit the consistent 
595          * memory DMA range to max_direct_dma_addr.
596          */
597         if (dma_ops == gx_pci_dma_map_ops ||
598             dma_ops == gx_hybrid_pci_dma_map_ops ||
599             dma_ops == gx_legacy_pci_dma_map_ops) {
600                 if (mask == DMA_BIT_MASK(64))
601                         set_dma_ops(dev, gx_pci_dma_map_ops);
602                 else if (mask > dev->archdata.max_direct_dma_addr)
603                         mask = dev->archdata.max_direct_dma_addr;
604         }
605
606         if (!dma_supported(dev, mask))
607                 return -EIO;
608         dev->coherent_dma_mask = mask;
609         return 0;
610 }
611 EXPORT_SYMBOL(dma_set_coherent_mask);
612 #endif
613
614 #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
615 /*
616  * The generic dma_get_required_mask() uses the highest physical address
617  * (max_pfn) to provide the hint to the PCI drivers regarding 32-bit or
618  * 64-bit DMA configuration. Since TILEGx has I/O TLB/MMU, allowing the
619  * DMAs to use the full 64-bit PCI address space and not limited by
620  * the physical memory space, we always let the PCI devices use
621  * 64-bit DMA if they have that capability, by returning the 64-bit
622  * DMA mask here. The device driver has the option to use 32-bit DMA if
623  * the device is not capable of 64-bit DMA.
624  */
625 u64 dma_get_required_mask(struct device *dev)
626 {
627         return DMA_BIT_MASK(64);
628 }
629 EXPORT_SYMBOL_GPL(dma_get_required_mask);
630 #endif