Merge tag 'ntb-4.5' of git://github.com/jonmason/ntb
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 24 Jan 2016 00:00:52 +0000 (16:00 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 24 Jan 2016 00:00:52 +0000 (16:00 -0800)
Pull NTB updates from Jon Mason:
 "A new driver to support AMD NTB, a NTB performance test driver, NTB
  bugs fixes, and the ability to recover from running out of DMA
  descriptors"

* tag 'ntb-4.5' of git://github.com/jonmason/ntb:
  NTB: Fix macro parameter conflict with field name
  NTB: Add support for AMD PCI-Express Non-Transparent Bridge
  ntb: ntb perf tool
  NTB: Address out of DMA descriptor issue with NTB
  NTB: Clear property bits in BAR value
  NTB: ntb_process_tx error path bug

13 files changed:
MAINTAINERS
drivers/ntb/hw/Kconfig
drivers/ntb/hw/Makefile
drivers/ntb/hw/amd/Kconfig [new file with mode: 0644]
drivers/ntb/hw/amd/Makefile [new file with mode: 0644]
drivers/ntb/hw/amd/ntb_hw_amd.c [new file with mode: 0644]
drivers/ntb/hw/amd/ntb_hw_amd.h [new file with mode: 0644]
drivers/ntb/hw/intel/ntb_hw_intel.c
drivers/ntb/hw/intel/ntb_hw_intel.h
drivers/ntb/ntb_transport.c
drivers/ntb/test/Kconfig
drivers/ntb/test/Makefile
drivers/ntb/test/ntb_perf.c [new file with mode: 0644]

index fc17185..8f3f93c 100644 (file)
@@ -7702,6 +7702,12 @@ W:       https://github.com/jonmason/ntb/wiki
 T:     git git://github.com/jonmason/ntb.git
 F:     drivers/ntb/hw/intel/
 
+NTB AMD DRIVER
+M:     Xiangliang Yu <Xiangliang.Yu@amd.com>
+L:     linux-ntb@googlegroups.com
+S:     Supported
+F:     drivers/ntb/hw/amd/
+
 NTFS FILESYSTEM
 M:     Anton Altaparmakov <anton@tuxera.com>
 L:     linux-ntfs-dev@lists.sourceforge.net
index 4d5535c..7116472 100644 (file)
@@ -1 +1,2 @@
+source "drivers/ntb/hw/amd/Kconfig"
 source "drivers/ntb/hw/intel/Kconfig"
index 175d7c9..532e085 100644 (file)
@@ -1 +1,2 @@
+obj-$(CONFIG_NTB_AMD)  += amd/
 obj-$(CONFIG_NTB_INTEL)        += intel/
diff --git a/drivers/ntb/hw/amd/Kconfig b/drivers/ntb/hw/amd/Kconfig
new file mode 100644 (file)
index 0000000..cfe903c
--- /dev/null
@@ -0,0 +1,7 @@
+config NTB_AMD
+       tristate "AMD Non-Transparent Bridge support"
+       depends on X86_64
+       help
+        This driver supports AMD NTB on capable Zeppelin hardware.
+
+        If unsure, say N.
diff --git a/drivers/ntb/hw/amd/Makefile b/drivers/ntb/hw/amd/Makefile
new file mode 100644 (file)
index 0000000..ad54da9
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
new file mode 100644 (file)
index 0000000..588803a
--- /dev/null
@@ -0,0 +1,1143 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of AMD Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_amd.h"
+
+#define NTB_NAME       "ntb_hw_amd"
+#define NTB_DESC       "AMD(R) PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER                "1.0"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("AMD Inc.");
+
+static const struct file_operations amd_ntb_debugfs_info;
+static struct dentry *debugfs_dir;
+
+static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
+{
+       if (idx < 0 || idx > ndev->mw_count)
+               return -EINVAL;
+
+       return 1 << idx;
+}
+
+static int amd_ntb_mw_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
+                               phys_addr_t *base,
+                               resource_size_t *size,
+                               resource_size_t *align,
+                               resource_size_t *align_size)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       int bar;
+
+       bar = ndev_mw_to_bar(ndev, idx);
+       if (bar < 0)
+               return bar;
+
+       if (base)
+               *base = pci_resource_start(ndev->ntb.pdev, bar);
+
+       if (size)
+               *size = pci_resource_len(ndev->ntb.pdev, bar);
+
+       if (align)
+               *align = SZ_4K;
+
+       if (align_size)
+               *align_size = 1;
+
+       return 0;
+}
+
+static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+                               dma_addr_t addr, resource_size_t size)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       unsigned long xlat_reg, limit_reg = 0;
+       resource_size_t mw_size;
+       void __iomem *mmio, *peer_mmio;
+       u64 base_addr, limit, reg_val;
+       int bar;
+
+       bar = ndev_mw_to_bar(ndev, idx);
+       if (bar < 0)
+               return bar;
+
+       mw_size = pci_resource_len(ndev->ntb.pdev, bar);
+
+       /* make sure the range fits in the usable mw size */
+       if (size > mw_size)
+               return -EINVAL;
+
+       mmio = ndev->self_mmio;
+       peer_mmio = ndev->peer_mmio;
+
+       base_addr = pci_resource_start(ndev->ntb.pdev, bar);
+
+       if (bar != 1) {
+               xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 3);
+               limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 3);
+
+               /* Set the limit if supported */
+               limit = base_addr + size;
+
+               /* set and verify setting the translation address */
+               write64(addr, peer_mmio + xlat_reg);
+               reg_val = read64(peer_mmio + xlat_reg);
+               if (reg_val != addr) {
+                       write64(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+
+               /* set and verify setting the limit */
+               write64(limit, mmio + limit_reg);
+               reg_val = read64(mmio + limit_reg);
+               if (reg_val != limit) {
+                       write64(base_addr, mmio + limit_reg);
+                       write64(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+       } else {
+               xlat_reg = AMD_BAR1XLAT_OFFSET;
+               limit_reg = AMD_BAR1LMT_OFFSET;
+
+               /* split bar addr range must all be 32 bit */
+               if (addr & (~0ull << 32))
+                       return -EINVAL;
+               if ((addr + size) & (~0ull << 32))
+                       return -EINVAL;
+
+               /* Set the limit if supported */
+               limit = base_addr + size;
+
+               /* set and verify setting the translation address */
+               write64(addr, peer_mmio + xlat_reg);
+               reg_val = read64(peer_mmio + xlat_reg);
+               if (reg_val != addr) {
+                       write64(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+
+               /* set and verify setting the limit */
+               writel(limit, mmio + limit_reg);
+               reg_val = readl(mmio + limit_reg);
+               if (reg_val != limit) {
+                       writel(base_addr, mmio + limit_reg);
+                       writel(0, peer_mmio + xlat_reg);
+                       return -EIO;
+               }
+       }
+
+       return 0;
+}
+
+static int amd_link_is_up(struct amd_ntb_dev *ndev)
+{
+       if (!ndev->peer_sta)
+               return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
+
+       /* If peer_sta is reset or D0 event, the ISR has
+        * started a timer to check link status of hardware.
+        * So here just clear status bit. And if peer_sta is
+        * D3 or PME_TO, D0/reset event will be happened when
+        * system wakeup/poweron, so do nothing here.
+        */
+       if (ndev->peer_sta & AMD_PEER_RESET_EVENT)
+               ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
+       else if (ndev->peer_sta & AMD_PEER_D0_EVENT)
+               ndev->peer_sta = 0;
+
+       return 0;
+}
+
+static int amd_ntb_link_is_up(struct ntb_dev *ntb,
+                             enum ntb_speed *speed,
+                             enum ntb_width *width)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       int ret = 0;
+
+       if (amd_link_is_up(ndev)) {
+               if (speed)
+                       *speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
+               if (width)
+                       *width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
+
+               dev_dbg(ndev_dev(ndev), "link is up.\n");
+
+               ret = 1;
+       } else {
+               if (speed)
+                       *speed = NTB_SPEED_NONE;
+               if (width)
+                       *width = NTB_WIDTH_NONE;
+
+               dev_dbg(ndev_dev(ndev), "link is down.\n");
+       }
+
+       return ret;
+}
+
+static int amd_ntb_link_enable(struct ntb_dev *ntb,
+                              enum ntb_speed max_speed,
+                              enum ntb_width max_width)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 ntb_ctl;
+
+       /* Enable event interrupt */
+       ndev->int_mask &= ~AMD_EVENT_INTMASK;
+       writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+       if (ndev->ntb.topo == NTB_TOPO_SEC)
+               return -EINVAL;
+       dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+       ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+       ntb_ctl |= (PMM_REG_CTL | SMM_REG_CTL);
+       writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+       return 0;
+}
+
+static int amd_ntb_link_disable(struct ntb_dev *ntb)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 ntb_ctl;
+
+       /* Disable event interrupt */
+       ndev->int_mask |= AMD_EVENT_INTMASK;
+       writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+       if (ndev->ntb.topo == NTB_TOPO_SEC)
+               return -EINVAL;
+       dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+       ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+       ntb_ctl &= ~(PMM_REG_CTL | SMM_REG_CTL);
+       writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+       return 0;
+}
+
+static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->db_valid_mask;
+}
+
+static int amd_ntb_db_vector_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->db_count;
+}
+
+static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (db_vector < 0 || db_vector > ndev->db_count)
+               return 0;
+
+       return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
+}
+
+static u64 amd_ntb_db_read(struct ntb_dev *ntb)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+
+       return (u64)readw(mmio + AMD_DBSTAT_OFFSET);
+}
+
+static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+
+       writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
+
+       return 0;
+}
+
+static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned long flags;
+
+       if (db_bits & ~ndev->db_valid_mask)
+               return -EINVAL;
+
+       spin_lock_irqsave(&ndev->db_mask_lock, flags);
+       ndev->db_mask |= db_bits;
+       writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+       spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+       return 0;
+}
+
+static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned long flags;
+
+       if (db_bits & ~ndev->db_valid_mask)
+               return -EINVAL;
+
+       spin_lock_irqsave(&ndev->db_mask_lock, flags);
+       ndev->db_mask &= ~db_bits;
+       writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+       spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+       return 0;
+}
+
+static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
+                               phys_addr_t *db_addr,
+                               resource_size_t *db_size)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (db_addr)
+               *db_addr = (phys_addr_t)(ndev->peer_mmio + AMD_DBREQ_OFFSET);
+       if (db_size)
+               *db_size = sizeof(u32);
+
+       return 0;
+}
+
+static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+
+       writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
+
+       return 0;
+}
+
+static int amd_ntb_spad_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->spad_count;
+}
+
+static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return 0;
+
+       offset = ndev->self_spad + (idx << 2);
+       return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_spad_write(struct ntb_dev *ntb,
+                             int idx, u32 val)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       offset = ndev->self_spad + (idx << 2);
+       writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+       return 0;
+}
+
+static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+                                 phys_addr_t *spad_addr)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       if (spad_addr)
+               *spad_addr = (phys_addr_t)(ndev->self_mmio + AMD_SPAD_OFFSET +
+                                          ndev->peer_spad + (idx << 2));
+       return 0;
+}
+
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       offset = ndev->peer_spad + (idx << 2);
+       return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
+                                  int idx, u32 val)
+{
+       struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+       void __iomem *mmio = ndev->self_mmio;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count)
+               return -EINVAL;
+
+       offset = ndev->peer_spad + (idx << 2);
+       writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+       return 0;
+}
+
+static const struct ntb_dev_ops amd_ntb_ops = {
+       .mw_count               = amd_ntb_mw_count,
+       .mw_get_range           = amd_ntb_mw_get_range,
+       .mw_set_trans           = amd_ntb_mw_set_trans,
+       .link_is_up             = amd_ntb_link_is_up,
+       .link_enable            = amd_ntb_link_enable,
+       .link_disable           = amd_ntb_link_disable,
+       .db_valid_mask          = amd_ntb_db_valid_mask,
+       .db_vector_count        = amd_ntb_db_vector_count,
+       .db_vector_mask         = amd_ntb_db_vector_mask,
+       .db_read                = amd_ntb_db_read,
+       .db_clear               = amd_ntb_db_clear,
+       .db_set_mask            = amd_ntb_db_set_mask,
+       .db_clear_mask          = amd_ntb_db_clear_mask,
+       .peer_db_addr           = amd_ntb_peer_db_addr,
+       .peer_db_set            = amd_ntb_peer_db_set,
+       .spad_count             = amd_ntb_spad_count,
+       .spad_read              = amd_ntb_spad_read,
+       .spad_write             = amd_ntb_spad_write,
+       .peer_spad_addr         = amd_ntb_peer_spad_addr,
+       .peer_spad_read         = amd_ntb_peer_spad_read,
+       .peer_spad_write        = amd_ntb_peer_spad_write,
+};
+
+static void amd_ack_smu(struct amd_ntb_dev *ndev, u32 bit)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       int reg;
+
+       reg = readl(mmio + AMD_SMUACK_OFFSET);
+       reg |= bit;
+       writel(reg, mmio + AMD_SMUACK_OFFSET);
+
+       ndev->peer_sta |= bit;
+}
+
+static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       u32 status;
+
+       status = readl(mmio + AMD_INTSTAT_OFFSET);
+       if (!(status & AMD_EVENT_INTMASK))
+               return;
+
+       dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status, vec);
+
+       status &= AMD_EVENT_INTMASK;
+       switch (status) {
+       case AMD_PEER_FLUSH_EVENT:
+               dev_info(ndev_dev(ndev), "Flush is done.\n");
+               break;
+       case AMD_PEER_RESET_EVENT:
+               amd_ack_smu(ndev, AMD_PEER_RESET_EVENT);
+
+               /* link down first */
+               ntb_link_event(&ndev->ntb);
+               /* polling peer status */
+               schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+               break;
+       case AMD_PEER_D3_EVENT:
+       case AMD_PEER_PMETO_EVENT:
+               amd_ack_smu(ndev, status);
+
+               /* link down */
+               ntb_link_event(&ndev->ntb);
+
+               break;
+       case AMD_PEER_D0_EVENT:
+               mmio = ndev->peer_mmio;
+               status = readl(mmio + AMD_PMESTAT_OFFSET);
+               /* check if this is WAKEUP event */
+               if (status & 0x1)
+                       dev_info(ndev_dev(ndev), "Wakeup is done.\n");
+
+               amd_ack_smu(ndev, AMD_PEER_D0_EVENT);
+
+               /* start a timer to poll link status */
+               schedule_delayed_work(&ndev->hb_timer,
+                                     AMD_LINK_HB_TIMEOUT);
+               break;
+       default:
+               dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
+               break;
+       }
+}
+
+static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
+{
+       dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
+
+       if (vec > (AMD_DB_CNT - 1) || (ndev->msix_vec_count == 1))
+               amd_handle_event(ndev, vec);
+
+       if (vec < AMD_DB_CNT)
+               ntb_db_event(&ndev->ntb, vec);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t ndev_vec_isr(int irq, void *dev)
+{
+       struct amd_ntb_vec *nvec = dev;
+
+       return ndev_interrupt(nvec->ndev, nvec->num);
+}
+
+static irqreturn_t ndev_irq_isr(int irq, void *dev)
+{
+       struct amd_ntb_dev *ndev = dev;
+
+       return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
+}
+
+static int ndev_init_isr(struct amd_ntb_dev *ndev,
+                        int msix_min, int msix_max)
+{
+       struct pci_dev *pdev;
+       int rc, i, msix_count, node;
+
+       pdev = ndev_pdev(ndev);
+
+       node = dev_to_node(&pdev->dev);
+
+       ndev->db_mask = ndev->db_valid_mask;
+
+       /* Try to set up msix irq */
+       ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec),
+                                GFP_KERNEL, node);
+       if (!ndev->vec)
+               goto err_msix_vec_alloc;
+
+       ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix),
+                                 GFP_KERNEL, node);
+       if (!ndev->msix)
+               goto err_msix_alloc;
+
+       for (i = 0; i < msix_max; ++i)
+               ndev->msix[i].entry = i;
+
+       msix_count = pci_enable_msix_range(pdev, ndev->msix,
+                                          msix_min, msix_max);
+       if (msix_count < 0)
+               goto err_msix_enable;
+
+       /* NOTE: Disable MSIX if msix count is less than 16 because of
+        * hardware limitation.
+        */
+       if (msix_count < msix_min) {
+               pci_disable_msix(pdev);
+               goto err_msix_enable;
+       }
+
+       for (i = 0; i < msix_count; ++i) {
+               ndev->vec[i].ndev = ndev;
+               ndev->vec[i].num = i;
+               rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
+                                "ndev_vec_isr", &ndev->vec[i]);
+               if (rc)
+                       goto err_msix_request;
+       }
+
+       dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
+       ndev->db_count = msix_min;
+       ndev->msix_vec_count = msix_max;
+       return 0;
+
+err_msix_request:
+       while (i-- > 0)
+               free_irq(ndev->msix[i].vector, ndev);
+       pci_disable_msix(pdev);
+err_msix_enable:
+       kfree(ndev->msix);
+err_msix_alloc:
+       kfree(ndev->vec);
+err_msix_vec_alloc:
+       ndev->msix = NULL;
+       ndev->vec = NULL;
+
+       /* Try to set up msi irq */
+       rc = pci_enable_msi(pdev);
+       if (rc)
+               goto err_msi_enable;
+
+       rc = request_irq(pdev->irq, ndev_irq_isr, 0,
+                        "ndev_irq_isr", ndev);
+       if (rc)
+               goto err_msi_request;
+
+       dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
+       ndev->db_count = 1;
+       ndev->msix_vec_count = 1;
+       return 0;
+
+err_msi_request:
+       pci_disable_msi(pdev);
+err_msi_enable:
+
+       /* Try to set up intx irq */
+       pci_intx(pdev, 1);
+
+       rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
+                        "ndev_irq_isr", ndev);
+       if (rc)
+               goto err_intx_request;
+
+       dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
+       ndev->db_count = 1;
+       ndev->msix_vec_count = 1;
+       return 0;
+
+err_intx_request:
+       return rc;
+}
+
+static void ndev_deinit_isr(struct amd_ntb_dev *ndev)
+{
+       struct pci_dev *pdev;
+       void __iomem *mmio = ndev->self_mmio;
+       int i;
+
+       pdev = ndev_pdev(ndev);
+
+       /* Mask all doorbell interrupts */
+       ndev->db_mask = ndev->db_valid_mask;
+       writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+
+       if (ndev->msix) {
+               i = ndev->msix_vec_count;
+               while (i--)
+                       free_irq(ndev->msix[i].vector, &ndev->vec[i]);
+               pci_disable_msix(pdev);
+               kfree(ndev->msix);
+               kfree(ndev->vec);
+       } else {
+               free_irq(pdev->irq, ndev);
+               if (pci_dev_msi_enabled(pdev))
+                       pci_disable_msi(pdev);
+               else
+                       pci_intx(pdev, 0);
+       }
+}
+
+static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
+                                size_t count, loff_t *offp)
+{
+       struct amd_ntb_dev *ndev;
+       void __iomem *mmio;
+       char *buf;
+       size_t buf_size;
+       ssize_t ret, off;
+       union { u64 v64; u32 v32; u16 v16; } u;
+
+       ndev = filp->private_data;
+       mmio = ndev->self_mmio;
+
+       buf_size = min(count, 0x800ul);
+
+       buf = kmalloc(buf_size, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       off = 0;
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "NTB Device Information:\n");
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "Connection Topology -\t%s\n",
+                        ntb_topo_string(ndev->ntb.topo));
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
+
+       if (!amd_link_is_up(ndev)) {
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Status -\t\tDown\n");
+       } else {
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Status -\t\tUp\n");
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Speed -\t\tPCI-E Gen %u\n",
+                                NTB_LNK_STA_SPEED(ndev->lnk_sta));
+               off += scnprintf(buf + off, buf_size - off,
+                                "Link Width -\t\tx%u\n",
+                                NTB_LNK_STA_WIDTH(ndev->lnk_sta));
+       }
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "Memory Window Count -\t%u\n", ndev->mw_count);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Scratchpad Count -\t%u\n", ndev->spad_count);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Count -\t%u\n", ndev->db_count);
+       off += scnprintf(buf + off, buf_size - off,
+                        "MSIX Vector Count -\t%u\n", ndev->msix_vec_count);
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask);
+
+       u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Mask -\t\t\t%#06x\n", u.v32);
+
+       u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "Doorbell Bell -\t\t\t%#06x\n", u.v32);
+
+       off += scnprintf(buf + off, buf_size - off,
+                        "\nNTB Incoming XLAT:\n");
+
+       u.v64 = read64(mmio + AMD_BAR1XLAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "XLAT1 -\t\t%#018llx\n", u.v64);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "XLAT23 -\t\t%#018llx\n", u.v64);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "XLAT45 -\t\t%#018llx\n", u.v64);
+
+       u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "LMT1 -\t\t\t%#06x\n", u.v32);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "LMT23 -\t\t\t%#018llx\n", u.v64);
+
+       u.v64 = read64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
+       off += scnprintf(buf + off, buf_size - off,
+                        "LMT45 -\t\t\t%#018llx\n", u.v64);
+
+       ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
+       kfree(buf);
+       return ret;
+}
+
+static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
+{
+       if (!debugfs_dir) {
+               ndev->debugfs_dir = NULL;
+               ndev->debugfs_info = NULL;
+       } else {
+               ndev->debugfs_dir =
+                       debugfs_create_dir(ndev_name(ndev), debugfs_dir);
+               if (!ndev->debugfs_dir)
+                       ndev->debugfs_info = NULL;
+               else
+                       ndev->debugfs_info =
+                               debugfs_create_file("info", S_IRUSR,
+                                                   ndev->debugfs_dir, ndev,
+                                                   &amd_ntb_debugfs_info);
+       }
+}
+
+static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev)
+{
+       debugfs_remove_recursive(ndev->debugfs_dir);
+}
+
+static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
+                                   struct pci_dev *pdev)
+{
+       ndev->ntb.pdev = pdev;
+       ndev->ntb.topo = NTB_TOPO_NONE;
+       ndev->ntb.ops = &amd_ntb_ops;
+       ndev->int_mask = AMD_EVENT_INTMASK;
+       spin_lock_init(&ndev->db_mask_lock);
+}
+
+static int amd_poll_link(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->peer_mmio;
+       u32 reg, stat;
+       int rc;
+
+       reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+       reg &= NTB_LIN_STA_ACTIVE_BIT;
+
+       dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
+
+       if (reg == ndev->cntl_sta)
+               return 0;
+
+       ndev->cntl_sta = reg;
+
+       rc = pci_read_config_dword(ndev->ntb.pdev,
+                                  AMD_LINK_STATUS_OFFSET, &stat);
+       if (rc)
+               return 0;
+       ndev->lnk_sta = stat;
+
+       return 1;
+}
+
+static void amd_link_hb(struct work_struct *work)
+{
+       struct amd_ntb_dev *ndev = hb_ndev(work);
+
+       if (amd_poll_link(ndev))
+               ntb_link_event(&ndev->ntb);
+
+       if (!amd_link_is_up(ndev))
+               schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+}
+
+static int amd_init_isr(struct amd_ntb_dev *ndev)
+{
+       return ndev_init_isr(ndev, AMD_DB_CNT, AMD_MSIX_VECTOR_CNT);
+}
+
+static void amd_init_side_info(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned int reg;
+
+       reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+       if (!(reg & AMD_SIDE_READY)) {
+               reg |= AMD_SIDE_READY;
+               writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+       }
+}
+
+static void amd_deinit_side_info(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       unsigned int reg;
+
+       reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+       if (reg & AMD_SIDE_READY) {
+               reg &= ~AMD_SIDE_READY;
+               writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+               readl(mmio + AMD_SIDEINFO_OFFSET);
+       }
+}
+
+static int amd_init_ntb(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+
+       ndev->mw_count = AMD_MW_CNT;
+       ndev->spad_count = AMD_SPADS_CNT;
+       ndev->db_count = AMD_DB_CNT;
+
+       switch (ndev->ntb.topo) {
+       case NTB_TOPO_PRI:
+       case NTB_TOPO_SEC:
+               ndev->spad_count >>= 1;
+               if (ndev->ntb.topo == NTB_TOPO_PRI) {
+                       ndev->self_spad = 0;
+                       ndev->peer_spad = 0x20;
+               } else {
+                       ndev->self_spad = 0x20;
+                       ndev->peer_spad = 0;
+               }
+
+               INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
+               schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+               break;
+       default:
+               dev_err(ndev_dev(ndev), "AMD NTB does not support B2B mode.\n");
+               return -EINVAL;
+       }
+
+       ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+       /* Mask event interrupts */
+       writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+       return 0;
+}
+
+static enum ntb_topo amd_get_topo(struct amd_ntb_dev *ndev)
+{
+       void __iomem *mmio = ndev->self_mmio;
+       u32 info;
+
+       info = readl(mmio + AMD_SIDEINFO_OFFSET);
+       if (info & AMD_SIDE_MASK)
+               return NTB_TOPO_SEC;
+       else
+               return NTB_TOPO_PRI;
+}
+
+static int amd_init_dev(struct amd_ntb_dev *ndev)
+{
+       struct pci_dev *pdev;
+       int rc = 0;
+
+       pdev = ndev_pdev(ndev);
+
+       ndev->ntb.topo = amd_get_topo(ndev);
+       dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
+               ntb_topo_string(ndev->ntb.topo));
+
+       rc = amd_init_ntb(ndev);
+       if (rc)
+               return rc;
+
+       rc = amd_init_isr(ndev);
+       if (rc) {
+               dev_err(ndev_dev(ndev), "fail to init isr.\n");
+               return rc;
+       }
+
+       ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+       return 0;
+}
+
+static void amd_deinit_dev(struct amd_ntb_dev *ndev)
+{
+       cancel_delayed_work_sync(&ndev->hb_timer);
+
+       ndev_deinit_isr(ndev);
+}
+
+static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
+                           struct pci_dev *pdev)
+{
+       int rc;
+
+       pci_set_drvdata(pdev, ndev);
+
+       rc = pci_enable_device(pdev);
+       if (rc)
+               goto err_pci_enable;
+
+       rc = pci_request_regions(pdev, NTB_NAME);
+       if (rc)
+               goto err_pci_regions;
+
+       pci_set_master(pdev);
+
+       rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (rc) {
+               rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               if (rc)
+                       goto err_dma_mask;
+               dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
+       }
+
+       rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (rc) {
+               rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+               if (rc)
+                       goto err_dma_mask;
+               dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
+       }
+
+       ndev->self_mmio = pci_iomap(pdev, 0, 0);
+       if (!ndev->self_mmio) {
+               rc = -EIO;
+               goto err_dma_mask;
+       }
+       ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
+
+       return 0;
+
+err_dma_mask:
+       pci_clear_master(pdev);
+err_pci_regions:
+       pci_disable_device(pdev);
+err_pci_enable:
+       pci_set_drvdata(pdev, NULL);
+       return rc;
+}
+
+static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev)
+{
+       struct pci_dev *pdev = ndev_pdev(ndev);
+
+       pci_iounmap(pdev, ndev->self_mmio);
+
+       pci_clear_master(pdev);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
+}
+
+static int amd_ntb_pci_probe(struct pci_dev *pdev,
+                            const struct pci_device_id *id)
+{
+       struct amd_ntb_dev *ndev;
+       int rc, node;
+
+       node = dev_to_node(&pdev->dev);
+
+       ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
+       if (!ndev) {
+               rc = -ENOMEM;
+               goto err_ndev;
+       }
+
+       ndev_init_struct(ndev, pdev);
+
+       rc = amd_ntb_init_pci(ndev, pdev);
+       if (rc)
+               goto err_init_pci;
+
+       rc = amd_init_dev(ndev);
+       if (rc)
+               goto err_init_dev;
+
+       /* write side info */
+       amd_init_side_info(ndev);
+
+       amd_poll_link(ndev);
+
+       ndev_init_debugfs(ndev);
+
+       rc = ntb_register_device(&ndev->ntb);
+       if (rc)
+               goto err_register;
+
+       dev_info(&pdev->dev, "NTB device registered.\n");
+
+       return 0;
+
+err_register:
+       ndev_deinit_debugfs(ndev);
+       amd_deinit_dev(ndev);
+err_init_dev:
+       amd_ntb_deinit_pci(ndev);
+err_init_pci:
+       kfree(ndev);
+err_ndev:
+       return rc;
+}
+
+static void amd_ntb_pci_remove(struct pci_dev *pdev)
+{
+       struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+       ntb_unregister_device(&ndev->ntb);
+       ndev_deinit_debugfs(ndev);
+       amd_deinit_side_info(ndev);
+       amd_deinit_dev(ndev);
+       amd_ntb_deinit_pci(ndev);
+       kfree(ndev);
+}
+
+static const struct file_operations amd_ntb_debugfs_info = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read = ndev_debugfs_read,
+};
+
+static const struct pci_device_id amd_ntb_pci_tbl[] = {
+       {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
+       {0}
+};
+MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
+
+static struct pci_driver amd_ntb_pci_driver = {
+       .name           = KBUILD_MODNAME,
+       .id_table       = amd_ntb_pci_tbl,
+       .probe          = amd_ntb_pci_probe,
+       .remove         = amd_ntb_pci_remove,
+};
+
+static int __init amd_ntb_pci_driver_init(void)
+{
+       pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+       if (debugfs_initialized())
+               debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+       return pci_register_driver(&amd_ntb_pci_driver);
+}
+module_init(amd_ntb_pci_driver_init);
+
+static void __exit amd_ntb_pci_driver_exit(void)
+{
+       pci_unregister_driver(&amd_ntb_pci_driver);
+       debugfs_remove_recursive(debugfs_dir);
+}
+module_exit(amd_ntb_pci_driver_exit);
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h
new file mode 100644 (file)
index 0000000..2eac3cd
--- /dev/null
@@ -0,0 +1,217 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of AMD Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#ifndef NTB_HW_AMD_H
+#define NTB_HW_AMD_H
+
+#include <linux/ntb.h>
+#include <linux/pci.h>
+
+#define PCI_DEVICE_ID_AMD_NTB  0x145B
+#define AMD_LINK_HB_TIMEOUT    msecs_to_jiffies(1000)
+#define AMD_LINK_STATUS_OFFSET 0x68
+#define NTB_LIN_STA_ACTIVE_BIT 0x00000002
+#define NTB_LNK_STA_SPEED_MASK 0x000F0000
+#define NTB_LNK_STA_WIDTH_MASK 0x03F00000
+#define NTB_LNK_STA_ACTIVE(x)  (!!((x) & NTB_LIN_STA_ACTIVE_BIT))
+#define NTB_LNK_STA_SPEED(x)   (((x) & NTB_LNK_STA_SPEED_MASK) >> 16)
+#define NTB_LNK_STA_WIDTH(x)   (((x) & NTB_LNK_STA_WIDTH_MASK) >> 20)
+
+#ifndef read64
+#ifdef readq
+#define read64 readq
+#else
+#define read64 _read64
+static inline u64 _read64(void __iomem *mmio)
+{
+       u64 low, high;
+
+       low = readl(mmio);
+       high = readl(mmio + sizeof(u32));
+       return low | (high << 32);
+}
+#endif
+#endif
+
+#ifndef write64
+#ifdef writeq
+#define write64 writeq
+#else
+#define write64 _write64
+static inline void _write64(u64 val, void __iomem *mmio)
+{
+       writel(val, mmio);
+       writel(val >> 32, mmio + sizeof(u32));
+}
+#endif
+#endif
+
+enum {
+       /* AMD NTB Capability */
+       AMD_MW_CNT              = 3,
+       AMD_DB_CNT              = 16,
+       AMD_MSIX_VECTOR_CNT     = 24,
+       AMD_SPADS_CNT           = 16,
+
+       /*  AMD NTB register offset */
+       AMD_CNTL_OFFSET         = 0x200,
+
+       /* NTB control register bits */
+       PMM_REG_CTL             = BIT(21),
+       SMM_REG_CTL             = BIT(20),
+       SMM_REG_ACC_PATH        = BIT(18),
+       PMM_REG_ACC_PATH        = BIT(17),
+       NTB_CLK_EN              = BIT(16),
+
+       AMD_STA_OFFSET          = 0x204,
+       AMD_PGSLV_OFFSET        = 0x208,
+       AMD_SPAD_MUX_OFFSET     = 0x20C,
+       AMD_SPAD_OFFSET         = 0x210,
+       AMD_RSMU_HCID           = 0x250,
+       AMD_RSMU_SIID           = 0x254,
+       AMD_PSION_OFFSET        = 0x300,
+       AMD_SSION_OFFSET        = 0x330,
+       AMD_MMINDEX_OFFSET      = 0x400,
+       AMD_MMDATA_OFFSET       = 0x404,
+       AMD_SIDEINFO_OFFSET     = 0x408,
+
+       AMD_SIDE_MASK           = BIT(0),
+       AMD_SIDE_READY          = BIT(1),
+
+       /* limit register */
+       AMD_ROMBARLMT_OFFSET    = 0x410,
+       AMD_BAR1LMT_OFFSET      = 0x414,
+       AMD_BAR23LMT_OFFSET     = 0x418,
+       AMD_BAR45LMT_OFFSET     = 0x420,
+       /* xlat address */
+       AMD_POMBARXLAT_OFFSET   = 0x428,
+       AMD_BAR1XLAT_OFFSET     = 0x430,
+       AMD_BAR23XLAT_OFFSET    = 0x438,
+       AMD_BAR45XLAT_OFFSET    = 0x440,
+       /* doorbell and interrupt */
+       AMD_DBFM_OFFSET         = 0x450,
+       AMD_DBREQ_OFFSET        = 0x454,
+       AMD_MIRRDBSTAT_OFFSET   = 0x458,
+       AMD_DBMASK_OFFSET       = 0x45C,
+       AMD_DBSTAT_OFFSET       = 0x460,
+       AMD_INTMASK_OFFSET      = 0x470,
+       AMD_INTSTAT_OFFSET      = 0x474,
+
+       /* event type */
+       AMD_PEER_FLUSH_EVENT    = BIT(0),
+       AMD_PEER_RESET_EVENT    = BIT(1),
+       AMD_PEER_D3_EVENT       = BIT(2),
+       AMD_PEER_PMETO_EVENT    = BIT(3),
+       AMD_PEER_D0_EVENT       = BIT(4),
+       AMD_EVENT_INTMASK       = (AMD_PEER_FLUSH_EVENT |
+                               AMD_PEER_RESET_EVENT | AMD_PEER_D3_EVENT |
+                               AMD_PEER_PMETO_EVENT | AMD_PEER_D0_EVENT),
+
+       AMD_PMESTAT_OFFSET      = 0x480,
+       AMD_PMSGTRIG_OFFSET     = 0x490,
+       AMD_LTRLATENCY_OFFSET   = 0x494,
+       AMD_FLUSHTRIG_OFFSET    = 0x498,
+
+       /* SMU register*/
+       AMD_SMUACK_OFFSET       = 0x4A0,
+       AMD_SINRST_OFFSET       = 0x4A4,
+       AMD_RSPNUM_OFFSET       = 0x4A8,
+       AMD_SMU_SPADMUTEX       = 0x4B0,
+       AMD_SMU_SPADOFFSET      = 0x4B4,
+
+       AMD_PEER_OFFSET         = 0x400,
+};
+
+struct amd_ntb_dev;
+
+struct amd_ntb_vec {
+       struct amd_ntb_dev      *ndev;
+       int                     num;
+};
+
+struct amd_ntb_dev {
+       struct ntb_dev ntb;
+
+       u32 ntb_side;
+       u32 lnk_sta;
+       u32 cntl_sta;
+       u32 peer_sta;
+
+       unsigned char mw_count;
+       unsigned char spad_count;
+       unsigned char db_count;
+       unsigned char msix_vec_count;
+
+       u64 db_valid_mask;
+       u64 db_mask;
+       u32 int_mask;
+
+       struct msix_entry *msix;
+       struct amd_ntb_vec *vec;
+
+       /* synchronize rmw access of db_mask and hw reg */
+       spinlock_t db_mask_lock;
+
+       void __iomem *self_mmio;
+       void __iomem *peer_mmio;
+       unsigned int self_spad;
+       unsigned int peer_spad;
+
+       struct delayed_work hb_timer;
+
+       struct dentry *debugfs_dir;
+       struct dentry *debugfs_info;
+};
+
+#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
+#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
+#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
+#define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)
+
+#endif
index a198f82..40d04ef 100644 (file)
@@ -875,7 +875,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
        limit_reg = bar2_off(ndev->xlat_reg->bar2_limit, bar);
 
        if (bar < 4 || !ndev->bar4_split) {
-               base = ioread64(mmio + base_reg);
+               base = ioread64(mmio + base_reg) & NTB_BAR_MASK_64;
 
                /* Set the limit if supported, if size is not mw_size */
                if (limit_reg && size != mw_size)
@@ -906,7 +906,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
                if ((addr + size) & (~0ull << 32))
                        return -EINVAL;
 
-               base = ioread32(mmio + base_reg);
+               base = ioread32(mmio + base_reg) & NTB_BAR_MASK_32;
 
                /* Set the limit if supported, if size is not mw_size */
                if (limit_reg && size != mw_size)
index 2eb4add..3ec149c 100644 (file)
 #define NTB_UNSAFE_DB                  BIT_ULL(0)
 #define NTB_UNSAFE_SPAD                        BIT_ULL(1)
 
+#define NTB_BAR_MASK_64                        ~(0xfull)
+#define NTB_BAR_MASK_32                        ~(0xfu)
+
 struct intel_ntb_dev;
 
 struct intel_ntb_reg {
@@ -334,7 +337,8 @@ struct intel_ntb_dev {
 #define ndev_pdev(ndev) ((ndev)->ntb.pdev)
 #define ndev_name(ndev) pci_name(ndev_pdev(ndev))
 #define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
-#define ntb_ndev(ntb) container_of(ntb, struct intel_ntb_dev, ntb)
-#define hb_ndev(work) container_of(work, struct intel_ntb_dev, hb_timer.work)
+#define ntb_ndev(__ntb) container_of(__ntb, struct intel_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct intel_ntb_dev, \
+                                    hb_timer.work)
 
 #endif
index 60654d5..ec4775f 100644 (file)
@@ -171,12 +171,14 @@ struct ntb_transport_qp {
        u64 rx_err_ver;
        u64 rx_memcpy;
        u64 rx_async;
+       u64 dma_rx_prep_err;
        u64 tx_bytes;
        u64 tx_pkts;
        u64 tx_ring_full;
        u64 tx_err_no_buf;
        u64 tx_memcpy;
        u64 tx_async;
+       u64 dma_tx_prep_err;
 };
 
 struct ntb_transport_mw {
@@ -249,6 +251,8 @@ enum {
 #define QP_TO_MW(nt, qp)       ((qp) % nt->mw_count)
 #define NTB_QP_DEF_NUM_ENTRIES 100
 #define NTB_LINK_DOWN_TIMEOUT  10
+#define DMA_RETRIES            20
+#define DMA_OUT_RESOURCE_TO    50
 
 static void ntb_transport_rxc_db(unsigned long data);
 static const struct ntb_ctx_ops ntb_transport_ops;
@@ -501,6 +505,12 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
        out_offset += snprintf(buf + out_offset, out_count - out_offset,
                               "free tx - \t%u\n",
                               ntb_transport_tx_free_entry(qp));
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "DMA tx prep err - \t%llu\n",
+                              qp->dma_tx_prep_err);
+       out_offset += snprintf(buf + out_offset, out_count - out_offset,
+                              "DMA rx prep err - \t%llu\n",
+                              qp->dma_rx_prep_err);
 
        out_offset += snprintf(buf + out_offset, out_count - out_offset,
                               "\n");
@@ -726,6 +736,8 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
        qp->tx_err_no_buf = 0;
        qp->tx_memcpy = 0;
        qp->tx_async = 0;
+       qp->dma_tx_prep_err = 0;
+       qp->dma_rx_prep_err = 0;
 }
 
 static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
@@ -1228,6 +1240,7 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
        struct dmaengine_unmap_data *unmap;
        dma_cookie_t cookie;
        void *buf = entry->buf;
+       int retries = 0;
 
        len = entry->len;
 
@@ -1263,11 +1276,21 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
 
        unmap->from_cnt = 1;
 
-       txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
-                                            unmap->addr[0], len,
-                                            DMA_PREP_INTERRUPT);
-       if (!txd)
+       for (retries = 0; retries < DMA_RETRIES; retries++) {
+               txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+                                                    unmap->addr[0], len,
+                                                    DMA_PREP_INTERRUPT);
+               if (txd)
+                       break;
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(DMA_OUT_RESOURCE_TO);
+       }
+
+       if (!txd) {
+               qp->dma_rx_prep_err++;
                goto err_get_unmap;
+       }
 
        txd->callback = ntb_rx_copy_callback;
        txd->callback_param = entry;
@@ -1460,6 +1483,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
        void __iomem *offset;
        size_t len = entry->len;
        void *buf = entry->buf;
+       int retries = 0;
 
        offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
        hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
@@ -1494,10 +1518,20 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
 
        unmap->to_cnt = 1;
 
-       txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
-                                            DMA_PREP_INTERRUPT);
-       if (!txd)
+       for (retries = 0; retries < DMA_RETRIES; retries++) {
+               txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0],
+                                                    len, DMA_PREP_INTERRUPT);
+               if (txd)
+                       break;
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(DMA_OUT_RESOURCE_TO);
+       }
+
+       if (!txd) {
+               qp->dma_tx_prep_err++;
                goto err_get_unmap;
+       }
 
        txd->callback = ntb_tx_copy_callback;
        txd->callback_param = entry;
@@ -1532,7 +1566,7 @@ static int ntb_process_tx(struct ntb_transport_qp *qp,
 
        if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
                if (qp->tx_handler)
-                       qp->tx_handler(qp->cb_data, qp, NULL, -EIO);
+                       qp->tx_handler(qp, qp->cb_data, NULL, -EIO);
 
                ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
                             &qp->tx_free_q);
index 01852f9..a5d0eda 100644 (file)
@@ -17,3 +17,11 @@ config NTB_TOOL
         functioning at a basic level.
 
         If unsure, say N.
+
+config NTB_PERF
+       tristate "NTB RAW Perf Measuring Tool"
+       help
+        This is a tool to measure raw NTB performance by transferring data
+        to and from the window without additional software interaction.
+
+        If unsure, say N.
index 0ea32a3..9e77e0b 100644 (file)
@@ -1,2 +1,3 @@
 obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
 obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
+obj-$(CONFIG_NTB_PERF) += ntb_perf.o
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
new file mode 100644 (file)
index 0000000..c8a37ba
--- /dev/null
@@ -0,0 +1,748 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *   PCIe NTB Perf Linux driver
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/ntb.h>
+
+#define DRIVER_NAME            "ntb_perf"
+#define DRIVER_DESCRIPTION     "PCIe NTB Performance Measurement Tool"
+
+#define DRIVER_LICENSE         "Dual BSD/GPL"
+#define DRIVER_VERSION         "1.0"
+#define DRIVER_AUTHOR          "Dave Jiang <dave.jiang@intel.com>"
+
+#define PERF_LINK_DOWN_TIMEOUT 10
+#define PERF_VERSION           0xffff0001
+#define MAX_THREADS            32
+#define MAX_TEST_SIZE          SZ_1M
+#define MAX_SRCS               32
+#define DMA_OUT_RESOURCE_TO    50
+#define DMA_RETRIES            20
+#define SZ_4G                  (1ULL << 32)
+#define MAX_SEG_ORDER          20 /* no larger than 1M for kmalloc buffer */
+
+MODULE_LICENSE(DRIVER_LICENSE);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+
+static struct dentry *perf_debugfs_dir;
+
+static unsigned int seg_order = 19; /* 512K */
+module_param(seg_order, uint, 0644);
+MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
+
+static unsigned int run_order = 32; /* 4G */
+module_param(run_order, uint, 0644);
+MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer");
+
+static bool use_dma; /* default to 0 */
+module_param(use_dma, bool, 0644);
+MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
+
+struct perf_mw {
+       phys_addr_t     phys_addr;
+       resource_size_t phys_size;
+       resource_size_t xlat_align;
+       resource_size_t xlat_align_size;
+       void __iomem    *vbase;
+       size_t          xlat_size;
+       size_t          buf_size;
+       void            *virt_addr;
+       dma_addr_t      dma_addr;
+};
+
+struct perf_ctx;
+
+struct pthr_ctx {
+       struct task_struct      *thread;
+       struct perf_ctx         *perf;
+       atomic_t                dma_sync;
+       struct dma_chan         *dma_chan;
+       int                     dma_prep_err;
+       int                     src_idx;
+       void                    *srcs[MAX_SRCS];
+};
+
+struct perf_ctx {
+       struct ntb_dev          *ntb;
+       spinlock_t              db_lock;
+       struct perf_mw          mw;
+       bool                    link_is_up;
+       struct work_struct      link_cleanup;
+       struct delayed_work     link_work;
+       struct dentry           *debugfs_node_dir;
+       struct dentry           *debugfs_run;
+       struct dentry           *debugfs_threads;
+       u8                      perf_threads;
+       bool                    run;
+       struct pthr_ctx         pthr_ctx[MAX_THREADS];
+       atomic_t                tsync;
+};
+
+enum {
+       VERSION = 0,
+       MW_SZ_HIGH,
+       MW_SZ_LOW,
+       SPAD_MSG,
+       SPAD_ACK,
+       MAX_SPAD
+};
+
+static void perf_link_event(void *ctx)
+{
+       struct perf_ctx *perf = ctx;
+
+       if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1)
+               schedule_delayed_work(&perf->link_work, 2*HZ);
+       else
+               schedule_work(&perf->link_cleanup);
+}
+
+static void perf_db_event(void *ctx, int vec)
+{
+       struct perf_ctx *perf = ctx;
+       u64 db_bits, db_mask;
+
+       db_mask = ntb_db_vector_mask(perf->ntb, vec);
+       db_bits = ntb_db_read(perf->ntb);
+
+       dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
+               vec, db_mask, db_bits);
+}
+
+static const struct ntb_ctx_ops perf_ops = {
+       .link_event = perf_link_event,
+       .db_event = perf_db_event,
+};
+
+static void perf_copy_callback(void *data)
+{
+       struct pthr_ctx *pctx = data;
+
+       atomic_dec(&pctx->dma_sync);
+}
+
+static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst,
+                        char *src, size_t size)
+{
+       struct perf_ctx *perf = pctx->perf;
+       struct dma_async_tx_descriptor *txd;
+       struct dma_chan *chan = pctx->dma_chan;
+       struct dma_device *device;
+       struct dmaengine_unmap_data *unmap;
+       dma_cookie_t cookie;
+       size_t src_off, dst_off;
+       struct perf_mw *mw = &perf->mw;
+       u64 vbase, dst_vaddr;
+       dma_addr_t dst_phys;
+       int retries = 0;
+
+       if (!use_dma) {
+               memcpy_toio(dst, src, size);
+               return size;
+       }
+
+       if (!chan) {
+               dev_err(&perf->ntb->dev, "DMA engine does not exist\n");
+               return -EINVAL;
+       }
+
+       device = chan->device;
+       src_off = (size_t)src & ~PAGE_MASK;
+       dst_off = (size_t)dst & ~PAGE_MASK;
+
+       if (!is_dma_copy_aligned(device, src_off, dst_off, size))
+               return -ENODEV;
+
+       vbase = (u64)(u64 *)mw->vbase;
+       dst_vaddr = (u64)(u64 *)dst;
+       dst_phys = mw->phys_addr + (dst_vaddr - vbase);
+
+       unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+       if (!unmap)
+               return -ENOMEM;
+
+       unmap->len = size;
+       unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
+                                     src_off, size, DMA_TO_DEVICE);
+       if (dma_mapping_error(device->dev, unmap->addr[0]))
+               goto err_get_unmap;
+
+       unmap->to_cnt = 1;
+
+       do {
+               txd = device->device_prep_dma_memcpy(chan, dst_phys,
+                                                    unmap->addr[0],
+                                                    size, DMA_PREP_INTERRUPT);
+               if (!txd) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(DMA_OUT_RESOURCE_TO);
+               }
+       } while (!txd && (++retries < DMA_RETRIES));
+
+       if (!txd) {
+               pctx->dma_prep_err++;
+               goto err_get_unmap;
+       }
+
+       txd->callback = perf_copy_callback;
+       txd->callback_param = pctx;
+       dma_set_unmap(txd, unmap);
+
+       cookie = dmaengine_submit(txd);
+       if (dma_submit_error(cookie))
+               goto err_set_unmap;
+
+       atomic_inc(&pctx->dma_sync);
+       dma_async_issue_pending(chan);
+
+       return size;
+
+err_set_unmap:
+       dmaengine_unmap_put(unmap);
+err_get_unmap:
+       dmaengine_unmap_put(unmap);
+       return 0;
+}
+
+static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src,
+                         u64 buf_size, u64 win_size, u64 total)
+{
+       int chunks, total_chunks, i;
+       int copied_chunks = 0;
+       u64 copied = 0, result;
+       char *tmp = dst;
+       u64 perf, diff_us;
+       ktime_t kstart, kstop, kdiff;
+
+       chunks = div64_u64(win_size, buf_size);
+       total_chunks = div64_u64(total, buf_size);
+       kstart = ktime_get();
+
+       for (i = 0; i < total_chunks; i++) {
+               result = perf_copy(pctx, tmp, src, buf_size);
+               copied += result;
+               copied_chunks++;
+               if (copied_chunks == chunks) {
+                       tmp = dst;
+                       copied_chunks = 0;
+               } else
+                       tmp += buf_size;
+
+               /* Probably should schedule every 4GB to prevent soft hang. */
+               if (((copied % SZ_4G) == 0) && !use_dma) {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       schedule_timeout(1);
+               }
+       }
+
+       if (use_dma) {
+               pr_info("%s: All DMA descriptors submitted\n", current->comm);
+               while (atomic_read(&pctx->dma_sync) != 0)
+                       msleep(20);
+       }
+
+       kstop = ktime_get();
+       kdiff = ktime_sub(kstop, kstart);
+       diff_us = ktime_to_us(kdiff);
+
+       pr_info("%s: copied %llu bytes\n", current->comm, copied);
+
+       pr_info("%s: lasted %llu usecs\n", current->comm, diff_us);
+
+       perf = div64_u64(copied, diff_us);
+
+       pr_info("%s: MBytes/s: %llu\n", current->comm, perf);
+
+       return 0;
+}
+
+static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
+{
+       return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
+}
+
+static int ntb_perf_thread(void *data)
+{
+       struct pthr_ctx *pctx = data;
+       struct perf_ctx *perf = pctx->perf;
+       struct pci_dev *pdev = perf->ntb->pdev;
+       struct perf_mw *mw = &perf->mw;
+       char *dst;
+       u64 win_size, buf_size, total;
+       void *src;
+       int rc, node, i;
+       struct dma_chan *dma_chan = NULL;
+
+       pr_info("kthread %s starting...\n", current->comm);
+
+       node = dev_to_node(&pdev->dev);
+
+       if (use_dma && !pctx->dma_chan) {
+               dma_cap_mask_t dma_mask;
+
+               dma_cap_zero(dma_mask);
+               dma_cap_set(DMA_MEMCPY, dma_mask);
+               dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
+                                              (void *)(unsigned long)node);
+               if (!dma_chan) {
+                       pr_warn("%s: cannot acquire DMA channel, quitting\n",
+                               current->comm);
+                       return -ENODEV;
+               }
+               pctx->dma_chan = dma_chan;
+       }
+
+       for (i = 0; i < MAX_SRCS; i++) {
+               pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
+               if (!pctx->srcs[i]) {
+                       rc = -ENOMEM;
+                       goto err;
+               }
+       }
+
+       win_size = mw->phys_size;
+       buf_size = 1ULL << seg_order;
+       total = 1ULL << run_order;
+
+       if (buf_size > MAX_TEST_SIZE)
+               buf_size = MAX_TEST_SIZE;
+
+       dst = (char *)mw->vbase;
+
+       atomic_inc(&perf->tsync);
+       while (atomic_read(&perf->tsync) != perf->perf_threads)
+               schedule();
+
+       src = pctx->srcs[pctx->src_idx];
+       pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1);
+
+       rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);
+
+       atomic_dec(&perf->tsync);
+
+       if (rc < 0) {
+               pr_err("%s: failed\n", current->comm);
+               rc = -ENXIO;
+               goto err;
+       }
+
+       for (i = 0; i < MAX_SRCS; i++) {
+               kfree(pctx->srcs[i]);
+               pctx->srcs[i] = NULL;
+       }
+
+       return 0;
+
+err:
+       for (i = 0; i < MAX_SRCS; i++) {
+               kfree(pctx->srcs[i]);
+               pctx->srcs[i] = NULL;
+       }
+
+       if (dma_chan) {
+               dma_release_channel(dma_chan);
+               pctx->dma_chan = NULL;
+       }
+
+       return rc;
+}
+
+static void perf_free_mw(struct perf_ctx *perf)
+{
+       struct perf_mw *mw = &perf->mw;
+       struct pci_dev *pdev = perf->ntb->pdev;
+
+       if (!mw->virt_addr)
+               return;
+
+       ntb_mw_clear_trans(perf->ntb, 0);
+       dma_free_coherent(&pdev->dev, mw->buf_size,
+                         mw->virt_addr, mw->dma_addr);
+       mw->xlat_size = 0;
+       mw->buf_size = 0;
+       mw->virt_addr = NULL;
+}
+
+static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
+{
+       struct perf_mw *mw = &perf->mw;
+       size_t xlat_size, buf_size;
+
+       if (!size)
+               return -EINVAL;
+
+       xlat_size = round_up(size, mw->xlat_align_size);
+       buf_size = round_up(size, mw->xlat_align);
+
+       if (mw->xlat_size == xlat_size)
+               return 0;
+
+       if (mw->buf_size)
+               perf_free_mw(perf);
+
+       mw->xlat_size = xlat_size;
+       mw->buf_size = buf_size;
+
+       mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size,
+                                          &mw->dma_addr, GFP_KERNEL);
+       if (!mw->virt_addr) {
+               mw->xlat_size = 0;
+               mw->buf_size = 0;
+       }
+
+       return 0;
+}
+
+static void perf_link_work(struct work_struct *work)
+{
+       struct perf_ctx *perf =
+               container_of(work, struct perf_ctx, link_work.work);
+       struct ntb_dev *ndev = perf->ntb;
+       struct pci_dev *pdev = ndev->pdev;
+       u32 val;
+       u64 size;
+       int rc;
+
+       dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+       size = perf->mw.phys_size;
+       ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
+       ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
+       ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
+
+       /* now read what peer wrote */
+       val = ntb_spad_read(ndev, VERSION);
+       if (val != PERF_VERSION) {
+               dev_dbg(&pdev->dev, "Remote version = %#x\n", val);
+               goto out;
+       }
+
+       val = ntb_spad_read(ndev, MW_SZ_HIGH);
+       size = (u64)val << 32;
+
+       val = ntb_spad_read(ndev, MW_SZ_LOW);
+       size |= val;
+
+       dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size);
+
+       rc = perf_set_mw(perf, size);
+       if (rc)
+               goto out1;
+
+       perf->link_is_up = true;
+
+       return;
+
+out1:
+       perf_free_mw(perf);
+
+out:
+       if (ntb_link_is_up(ndev, NULL, NULL) == 1)
+               schedule_delayed_work(&perf->link_work,
+                                     msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
+}
+
+static void perf_link_cleanup(struct work_struct *work)
+{
+       struct perf_ctx *perf = container_of(work,
+                                            struct perf_ctx,
+                                            link_cleanup);
+
+       dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+       if (!perf->link_is_up)
+               cancel_delayed_work_sync(&perf->link_work);
+}
+
+static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
+{
+       struct perf_mw *mw;
+       int rc;
+
+       mw = &perf->mw;
+
+       rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size,
+                             &mw->xlat_align, &mw->xlat_align_size);
+       if (rc)
+               return rc;
+
+       perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
+       if (!mw->vbase)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
+                               size_t count, loff_t *offp)
+{
+       struct perf_ctx *perf = filp->private_data;
+       char *buf;
+       ssize_t ret, out_offset;
+
+       if (!perf)
+               return 0;
+
+       buf = kmalloc(64, GFP_KERNEL);
+       out_offset = snprintf(buf, 64, "%d\n", perf->run);
+       ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
+       kfree(buf);
+
+       return ret;
+}
+
+static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
+                                size_t count, loff_t *offp)
+{
+       struct perf_ctx *perf = filp->private_data;
+       int node, i;
+
+       if (!perf->link_is_up)
+               return 0;
+
+       if (perf->perf_threads == 0)
+               return 0;
+
+       if (atomic_read(&perf->tsync) == 0)
+               perf->run = false;
+
+       if (perf->run) {
+               /* lets stop the threads */
+               perf->run = false;
+               for (i = 0; i < MAX_THREADS; i++) {
+                       if (perf->pthr_ctx[i].thread) {
+                               kthread_stop(perf->pthr_ctx[i].thread);
+                               perf->pthr_ctx[i].thread = NULL;
+                       } else
+                               break;
+               }
+       } else {
+               perf->run = true;
+
+               if (perf->perf_threads > MAX_THREADS) {
+                       perf->perf_threads = MAX_THREADS;
+                       pr_info("Reset total threads to: %u\n", MAX_THREADS);
+               }
+
+               /* no greater than 1M */
+               if (seg_order > MAX_SEG_ORDER) {
+                       seg_order = MAX_SEG_ORDER;
+                       pr_info("Fix seg_order to %u\n", seg_order);
+               }
+
+               if (run_order < seg_order) {
+                       run_order = seg_order;
+                       pr_info("Fix run_order to %u\n", run_order);
+               }
+
+               node = dev_to_node(&perf->ntb->pdev->dev);
+               /* launch kernel thread */
+               for (i = 0; i < perf->perf_threads; i++) {
+                       struct pthr_ctx *pctx;
+
+                       pctx = &perf->pthr_ctx[i];
+                       atomic_set(&pctx->dma_sync, 0);
+                       pctx->perf = perf;
+                       pctx->thread =
+                               kthread_create_on_node(ntb_perf_thread,
+                                                      (void *)pctx,
+                                                      node, "ntb_perf %d", i);
+                       if (pctx->thread)
+                               wake_up_process(pctx->thread);
+                       else {
+                               perf->run = false;
+                               for (i = 0; i < MAX_THREADS; i++) {
+                                       if (pctx->thread) {
+                                               kthread_stop(pctx->thread);
+                                               pctx->thread = NULL;
+                                       }
+                               }
+                       }
+
+                       if (perf->run == false)
+                               return -ENXIO;
+               }
+
+       }
+
+       return count;
+}
+
+static const struct file_operations ntb_perf_debugfs_run = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read = debugfs_run_read,
+       .write = debugfs_run_write,
+};
+
+static int perf_debugfs_setup(struct perf_ctx *perf)
+{
+       struct pci_dev *pdev = perf->ntb->pdev;
+
+       if (!debugfs_initialized())
+               return -ENODEV;
+
+       if (!perf_debugfs_dir) {
+               perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+               if (!perf_debugfs_dir)
+                       return -ENODEV;
+       }
+
+       perf->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
+                                                   perf_debugfs_dir);
+       if (!perf->debugfs_node_dir)
+               return -ENODEV;
+
+       perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
+                                               perf->debugfs_node_dir, perf,
+                                               &ntb_perf_debugfs_run);
+       if (!perf->debugfs_run)
+               return -ENODEV;
+
+       perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
+                                                 perf->debugfs_node_dir,
+                                                 &perf->perf_threads);
+       if (!perf->debugfs_threads)
+               return -ENODEV;
+
+       return 0;
+}
+
+static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+       struct pci_dev *pdev = ntb->pdev;
+       struct perf_ctx *perf;
+       int node;
+       int rc = 0;
+
+       node = dev_to_node(&pdev->dev);
+
+       perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
+       if (!perf) {
+               rc = -ENOMEM;
+               goto err_perf;
+       }
+
+       perf->ntb = ntb;
+       perf->perf_threads = 1;
+       atomic_set(&perf->tsync, 0);
+       perf->run = false;
+       spin_lock_init(&perf->db_lock);
+       perf_setup_mw(ntb, perf);
+       INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
+       INIT_WORK(&perf->link_cleanup, perf_link_cleanup);
+
+       rc = ntb_set_ctx(ntb, perf, &perf_ops);
+       if (rc)
+               goto err_ctx;
+
+       perf->link_is_up = false;
+       ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+       ntb_link_event(ntb);
+
+       rc = perf_debugfs_setup(perf);
+       if (rc)
+               goto err_ctx;
+
+       return 0;
+
+err_ctx:
+       cancel_delayed_work_sync(&perf->link_work);
+       cancel_work_sync(&perf->link_cleanup);
+       kfree(perf);
+err_perf:
+       return rc;
+}
+
+static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+       struct perf_ctx *perf = ntb->ctx;
+       int i;
+
+       dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
+
+       cancel_delayed_work_sync(&perf->link_work);
+       cancel_work_sync(&perf->link_cleanup);
+
+       ntb_clear_ctx(ntb);
+       ntb_link_disable(ntb);
+
+       debugfs_remove_recursive(perf_debugfs_dir);
+       perf_debugfs_dir = NULL;
+
+       if (use_dma) {
+               for (i = 0; i < MAX_THREADS; i++) {
+                       struct pthr_ctx *pctx = &perf->pthr_ctx[i];
+
+                       if (pctx->dma_chan)
+                               dma_release_channel(pctx->dma_chan);
+               }
+       }
+
+       kfree(perf);
+}
+
+static struct ntb_client perf_client = {
+       .ops = {
+               .probe = perf_probe,
+               .remove = perf_remove,
+       },
+};
+module_ntb_client(perf_client);