2 * This file implements the DMA operations for NVLink devices. The NPU
3 * devices all point to the same iommu table as the parent PCI device.
5 * Copyright Alistair Popple, IBM Corporation 2015.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of version 2 of the GNU General Public
9 * License as published by the Free Software Foundation.
12 #include <linux/export.h>
13 #include <linux/pci.h>
14 #include <linux/memblock.h>
16 #include <asm/iommu.h>
17 #include <asm/pnv-pci.h>
18 #include <asm/msi_bitmap.h>
25 * Other types of TCE cache invalidation are not functional in the
28 #define TCE_KILL_INVAL_ALL PPC_BIT(0)
30 static struct pci_dev *get_pci_dev(struct device_node *dn)
32 return PCI_DN(dn)->pcidev;
35 /* Given a NPU device get the associated PCI device. */
36 struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
38 struct device_node *dn;
39 struct pci_dev *gpdev;
41 /* Get assoicated PCI device */
42 dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
46 gpdev = get_pci_dev(dn);
51 EXPORT_SYMBOL(pnv_pci_get_gpu_dev);
53 /* Given the real PCI device get a linked NPU device. */
54 struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
56 struct device_node *dn;
57 struct pci_dev *npdev;
59 /* Get assoicated PCI device */
60 dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
64 npdev = get_pci_dev(dn);
69 EXPORT_SYMBOL(pnv_pci_get_npu_dev);
71 #define NPU_DMA_OP_UNSUPPORTED() \
72 dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \
75 static void *dma_npu_alloc(struct device *dev, size_t size,
76 dma_addr_t *dma_handle, gfp_t flag,
77 struct dma_attrs *attrs)
79 NPU_DMA_OP_UNSUPPORTED();
83 static void dma_npu_free(struct device *dev, size_t size,
84 void *vaddr, dma_addr_t dma_handle,
85 struct dma_attrs *attrs)
87 NPU_DMA_OP_UNSUPPORTED();
90 static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page,
91 unsigned long offset, size_t size,
92 enum dma_data_direction direction,
93 struct dma_attrs *attrs)
95 NPU_DMA_OP_UNSUPPORTED();
99 static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist,
100 int nelems, enum dma_data_direction direction,
101 struct dma_attrs *attrs)
103 NPU_DMA_OP_UNSUPPORTED();
107 static int dma_npu_dma_supported(struct device *dev, u64 mask)
109 NPU_DMA_OP_UNSUPPORTED();
113 static u64 dma_npu_get_required_mask(struct device *dev)
115 NPU_DMA_OP_UNSUPPORTED();
119 struct dma_map_ops dma_npu_ops = {
120 .map_page = dma_npu_map_page,
121 .map_sg = dma_npu_map_sg,
122 .alloc = dma_npu_alloc,
123 .free = dma_npu_free,
124 .dma_supported = dma_npu_dma_supported,
125 .get_required_mask = dma_npu_get_required_mask,
129 * Returns the PE assoicated with the PCI device of the given
130 * NPU. Returns the linked pci device if pci_dev != NULL.
132 static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
133 struct pci_dev **gpdev)
136 struct pci_controller *hose;
137 struct pci_dev *pdev;
138 struct pnv_ioda_pe *pe;
141 if (npe->flags & PNV_IODA_PE_PEER) {
145 pdev = pnv_pci_get_gpu_dev(npe->pdev);
149 pdn = pci_get_pdn(pdev);
150 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
153 hose = pci_bus_to_host(pdev->bus);
154 phb = hose->private_data;
155 pe = &phb->ioda.pe_array[pdn->pe_number];
164 void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe)
166 struct pnv_phb *phb = npe->phb;
168 if (WARN_ON(phb->type != PNV_PHB_NPU ||
169 !phb->ioda.tce_inval_reg ||
170 !(npe->flags & PNV_IODA_PE_DEV)))
173 mb(); /* Ensure previous TCE table stores are visible */
174 __raw_writeq(cpu_to_be64(TCE_KILL_INVAL_ALL),
175 phb->ioda.tce_inval_reg);
178 void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
179 struct iommu_table *tbl,
181 unsigned long npages,
184 struct pnv_phb *phb = npe->phb;
186 /* We can only invalidate the whole cache on NPU */
187 unsigned long val = TCE_KILL_INVAL_ALL;
189 if (WARN_ON(phb->type != PNV_PHB_NPU ||
190 !phb->ioda.tce_inval_reg ||
191 !(npe->flags & PNV_IODA_PE_DEV)))
194 mb(); /* Ensure previous TCE table stores are visible */
196 __raw_rm_writeq(cpu_to_be64(val),
197 (__be64 __iomem *) phb->ioda.tce_inval_reg_phys);
199 __raw_writeq(cpu_to_be64(val),
200 phb->ioda.tce_inval_reg);
203 void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe)
205 struct pnv_ioda_pe *gpe;
206 struct pci_dev *gpdev;
209 if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
212 gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
216 for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
217 /* Nothing to do if the PE is already connected. */
218 if (gpe->peers[i] == npe)
225 if (WARN_ON(avail < 0))
228 gpe->peers[avail] = npe;
229 gpe->flags |= PNV_IODA_PE_PEER;
232 * We assume that the NPU devices only have a single peer PE
233 * (the GPU PCIe device PE).
236 npe->flags |= PNV_IODA_PE_PEER;
240 * For the NPU we want to point the TCE table at the same table as the
243 static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
245 struct pnv_phb *phb = npe->phb;
246 struct pci_dev *gpdev;
247 struct pnv_ioda_pe *gpe;
253 * Find the assoicated PCI devices and get the dma window
254 * information from there.
256 if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
259 gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
263 addr = (void *)gpe->table_group.tables[0]->it_base;
264 size = gpe->table_group.tables[0]->it_size << 3;
265 rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
266 npe->pe_number, 1, __pa(addr),
268 if (rc != OPAL_SUCCESS)
269 pr_warn("%s: Error %lld setting DMA window on PHB#%d-PE#%d\n",
270 __func__, rc, phb->hose->global_number, npe->pe_number);
273 * We don't initialise npu_pe->tce32_table as we always use
274 * dma_npu_ops which are nops.
276 set_dma_ops(&npe->pdev->dev, &dma_npu_ops);
280 * Enable/disable bypass mode on the NPU. The NPU only supports one
281 * window per link, so bypass needs to be explicitly enabled or
282 * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
283 * active at the same time.
285 int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enable)
287 struct pnv_phb *phb = npe->phb;
290 if (phb->type != PNV_PHB_NPU || !npe->pdev)
294 /* Enable the bypass window */
295 phys_addr_t top = memblock_end_of_DRAM();
297 npe->tce_bypass_base = 0;
298 top = roundup_pow_of_two(top);
299 dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
301 rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
302 npe->pe_number, npe->pe_number,
303 npe->tce_bypass_base, top);
306 * Disable the bypass window by replacing it with the
309 pnv_npu_disable_bypass(npe);
315 int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
317 struct pci_controller *hose = pci_bus_to_host(npdev->bus);
318 struct pnv_phb *phb = hose->private_data;
319 struct pci_dn *pdn = pci_get_pdn(npdev);
320 struct pnv_ioda_pe *npe, *gpe;
321 struct pci_dev *gpdev;
325 if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
328 /* We only do bypass if it's enabled on the linked device */
329 npe = &phb->ioda.pe_array[pdn->pe_number];
330 gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
334 if (gpe->tce_bypass_enabled) {
335 top = gpe->tce_bypass_base + memblock_end_of_DRAM() - 1;
336 bypass = (dma_mask >= top);
340 dev_info(&npdev->dev, "Using 64-bit DMA iommu bypass\n");
342 dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
344 pnv_npu_dma_set_bypass(npe, bypass);
345 *npdev->dev.dma_mask = dma_mask;