* Jon Mason <jon.mason@intel.com>
*/
#include <linux/debugfs.h>
+#include <linux/delay.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/random.h>
#include <linux/slab.h>
#include "ntb_hw.h"
#include "ntb_regs.h"
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Intel Corporation");
+static bool xeon_errata_workaround = true;
+module_param(xeon_errata_workaround, bool, 0644);
+MODULE_PARM_DESC(xeon_errata_workaround, "Workaround for the Xeon Errata");
+
enum {
NTB_CONN_CLASSIC = 0,
NTB_CONN_B2B,
BWD_HW,
};
+static struct dentry *debugfs_dir;
+
+#define BWD_LINK_RECOVERY_TIME 500
+
/* Translate memory window 0,1 to BAR 2,4 */
-#define MW_TO_BAR(mw) (mw * 2 + 2)
+#define MW_TO_BAR(mw) (mw * NTB_MAX_NUM_MW + 2)
static DEFINE_PCI_DEVICE_TABLE(ntb_pci_tbl) = {
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)},
- {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_CLASSIC_JSF)},
- {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_RP_JSF)},
- {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_RP_SNB)},
{PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)},
- {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_CLASSIC_SNB)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)},
+ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)},
{0}
};
MODULE_DEVICE_TABLE(pci, ntb_pci_tbl);
*/
void __iomem *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw)
{
- if (mw >= NTB_NUM_MW)
+ if (mw >= ntb_max_mw(ndev))
return NULL;
return ndev->mw[mw].vbase;
*/
resource_size_t ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw)
{
- if (mw >= NTB_NUM_MW)
+ if (mw >= ntb_max_mw(ndev))
return 0;
return ndev->mw[mw].bar_sz;
*/
void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr)
{
- if (mw >= NTB_NUM_MW)
+ if (mw >= ntb_max_mw(ndev))
return;
dev_dbg(&ndev->pdev->dev, "Writing addr %Lx to BAR %d\n", addr,
(db * ndev->bits_per_vector), ndev->reg_ofs.sdb);
}
+static void bwd_recover_link(struct ntb_device *ndev)
+{
+ u32 status;
+
+ /* Driver resets the NTB ModPhy lanes - magic! */
+ writeb(0xe0, ndev->reg_base + BWD_MODPHY_PCSREG6);
+ writeb(0x40, ndev->reg_base + BWD_MODPHY_PCSREG4);
+ writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG4);
+ writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG6);
+
+ /* Driver waits 100ms to allow the NTB ModPhy to settle */
+ msleep(100);
+
+ /* Clear AER Errors, write to clear */
+ status = readl(ndev->reg_base + BWD_ERRCORSTS_OFFSET);
+ dev_dbg(&ndev->pdev->dev, "ERRCORSTS = %x\n", status);
+ status &= PCI_ERR_COR_REP_ROLL;
+ writel(status, ndev->reg_base + BWD_ERRCORSTS_OFFSET);
+
+ /* Clear unexpected electrical idle event in LTSSM, write to clear */
+ status = readl(ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET);
+ dev_dbg(&ndev->pdev->dev, "LTSSMERRSTS0 = %x\n", status);
+ status |= BWD_LTSSMERRSTS0_UNEXPECTEDEI;
+ writel(status, ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET);
+
+ /* Clear DeSkew Buffer error, write to clear */
+ status = readl(ndev->reg_base + BWD_DESKEWSTS_OFFSET);
+ dev_dbg(&ndev->pdev->dev, "DESKEWSTS = %x\n", status);
+ status |= BWD_DESKEWSTS_DBERR;
+ writel(status, ndev->reg_base + BWD_DESKEWSTS_OFFSET);
+
+ status = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
+ dev_dbg(&ndev->pdev->dev, "IBSTERRRCRVSTS0 = %x\n", status);
+ status &= BWD_IBIST_ERR_OFLOW;
+ writel(status, ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
+
+ /* Releases the NTB state machine to allow the link to retrain */
+ status = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
+ dev_dbg(&ndev->pdev->dev, "LTSSMSTATEJMP = %x\n", status);
+ status &= ~BWD_LTSSMSTATEJMP_FORCEDETECT;
+ writel(status, ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
+}
+
static void ntb_link_event(struct ntb_device *ndev, int link_state)
{
unsigned int event;
if (rc)
return;
}
+
+ ndev->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4;
+ ndev->link_speed = (status & NTB_LINK_SPEED_MASK);
dev_info(&ndev->pdev->dev, "Link Width %d, Link Speed %d\n",
- (status & NTB_LINK_WIDTH_MASK) >> 4,
- (status & NTB_LINK_SPEED_MASK));
+ ndev->link_width, ndev->link_speed);
} else {
dev_info(&ndev->pdev->dev, "Link Down\n");
ndev->link_status = NTB_LINK_DOWN;
event = NTB_EVENT_HW_LINK_DOWN;
+ /* Don't modify link width/speed, we need it in link recovery */
}
/* notify the upper layer if we have an event change */
return 0;
}
+static void bwd_link_recovery(struct work_struct *work)
+{
+ struct ntb_device *ndev = container_of(work, struct ntb_device,
+ lr_timer.work);
+ u32 status32;
+
+ bwd_recover_link(ndev);
+ /* There is a potential race between the 2 NTB devices recovering at the
+ * same time. If the times are the same, the link will not recover and
+ * the driver will be stuck in this loop forever. Add a random interval
+ * to the recovery time to prevent this race.
+ */
+ msleep(BWD_LINK_RECOVERY_TIME + prandom_u32() % BWD_LINK_RECOVERY_TIME);
+
+ status32 = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
+ if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT)
+ goto retry;
+
+ status32 = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
+ if (status32 & BWD_IBIST_ERR_OFLOW)
+ goto retry;
+
+ status32 = readl(ndev->reg_ofs.lnk_cntl);
+ if (!(status32 & BWD_CNTL_LINK_DOWN)) {
+ unsigned char speed, width;
+ u16 status16;
+
+ status16 = readw(ndev->reg_ofs.lnk_stat);
+ width = (status16 & NTB_LINK_WIDTH_MASK) >> 4;
+ speed = (status16 & NTB_LINK_SPEED_MASK);
+ if (ndev->link_width != width || ndev->link_speed != speed)
+ goto retry;
+ }
+
+ schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
+ return;
+
+retry:
+ schedule_delayed_work(&ndev->lr_timer, NTB_HB_TIMEOUT);
+}
+
/* BWD doesn't have link status interrupt, poll on that platform */
static void bwd_link_poll(struct work_struct *work)
{
if (rc)
dev_err(&ndev->pdev->dev,
"Error determining link status\n");
+
+ /* Check to see if a link error is the cause of the link down */
+ if (ndev->link_status == NTB_LINK_DOWN) {
+ u32 status32 = readl(ndev->reg_base +
+ BWD_LTSSMSTATEJMP_OFFSET);
+ if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT) {
+ schedule_delayed_work(&ndev->lr_timer, 0);
+ return;
+ }
+ }
}
schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
}
if (val & SNB_PPD_DEV_TYPE)
- ndev->dev_type = NTB_DEV_DSD;
- else
ndev->dev_type = NTB_DEV_USD;
+ else
+ ndev->dev_type = NTB_DEV_DSD;
ndev->reg_ofs.pdb = ndev->reg_base + SNB_PDOORBELL_OFFSET;
ndev->reg_ofs.pdb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET;
ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET;
ndev->reg_ofs.spci_cmd = ndev->reg_base + SNB_PCICMD_OFFSET;
- if (ndev->conn_type == NTB_CONN_B2B) {
- ndev->reg_ofs.sdb = ndev->reg_base + SNB_B2B_DOORBELL_OFFSET;
- ndev->reg_ofs.spad_write = ndev->reg_base + SNB_B2B_SPAD_OFFSET;
- ndev->limits.max_spads = SNB_MAX_SPADS;
+ /* There is a Xeon hardware errata related to writes to
+ * SDOORBELL or B2BDOORBELL in conjunction with inbound access
+ * to NTB MMIO Space, which may hang the system. To workaround
+ * this use the second memory window to access the interrupt and
+ * scratch pad registers on the remote system.
+ */
+ if (xeon_errata_workaround) {
+ if (!ndev->mw[1].bar_sz)
+ return -EINVAL;
+
+ ndev->limits.max_mw = SNB_ERRATA_MAX_MW;
+ ndev->reg_ofs.spad_write = ndev->mw[1].vbase +
+ SNB_SPAD_OFFSET;
+ ndev->reg_ofs.sdb = ndev->mw[1].vbase +
+ SNB_PDOORBELL_OFFSET;
+
+ /* Set the Limit register to 4k, the minimum size, to
+ * prevent an illegal access
+ */
+ writeq(ndev->mw[1].bar_sz + 0x1000, ndev->reg_base +
+ SNB_PBAR4LMT_OFFSET);
} else {
- ndev->reg_ofs.sdb = ndev->reg_base + SNB_SDOORBELL_OFFSET;
- ndev->reg_ofs.spad_write = ndev->reg_base + SNB_SPAD_OFFSET;
- ndev->limits.max_spads = SNB_MAX_COMPAT_SPADS;
+ ndev->limits.max_mw = SNB_MAX_MW;
+ ndev->reg_ofs.spad_write = ndev->reg_base +
+ SNB_B2B_SPAD_OFFSET;
+ ndev->reg_ofs.sdb = ndev->reg_base +
+ SNB_B2B_DOORBELL_OFFSET;
+
+ /* Disable the Limit register, just incase it is set to
+ * something silly
+ */
+ writeq(0, ndev->reg_base + SNB_PBAR4LMT_OFFSET);
}
+ /* The Xeon errata workaround requires setting SBAR Base
+ * addresses to known values, so that the PBAR XLAT can be
+ * pointed at SBAR0 of the remote system.
+ */
+ if (ndev->dev_type == NTB_DEV_USD) {
+ writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base +
+ SNB_PBAR2XLAT_OFFSET);
+ if (xeon_errata_workaround)
+ writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base +
+ SNB_PBAR4XLAT_OFFSET);
+ else {
+ writeq(SNB_MBAR45_DSD_ADDR, ndev->reg_base +
+ SNB_PBAR4XLAT_OFFSET);
+ /* B2B_XLAT_OFFSET is a 64bit register, but can
+ * only take 32bit writes
+ */
+ writel(SNB_MBAR01_USD_ADDR & 0xffffffff,
+ ndev->reg_base + SNB_B2B_XLAT_OFFSETL);
+ writel(SNB_MBAR01_DSD_ADDR >> 32,
+ ndev->reg_base + SNB_B2B_XLAT_OFFSETU);
+ }
+
+ writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base +
+ SNB_SBAR0BASE_OFFSET);
+ writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base +
+ SNB_SBAR2BASE_OFFSET);
+ writeq(SNB_MBAR45_USD_ADDR, ndev->reg_base +
+ SNB_SBAR4BASE_OFFSET);
+ } else {
+ writeq(SNB_MBAR23_USD_ADDR, ndev->reg_base +
+ SNB_PBAR2XLAT_OFFSET);
+ if (xeon_errata_workaround)
+ writeq(SNB_MBAR01_USD_ADDR, ndev->reg_base +
+ SNB_PBAR4XLAT_OFFSET);
+ else {
+ writeq(SNB_MBAR45_USD_ADDR, ndev->reg_base +
+ SNB_PBAR4XLAT_OFFSET);
+ /* B2B_XLAT_OFFSET is a 64bit register, but can
+ * only take 32bit writes
+ */
+ writel(SNB_MBAR01_USD_ADDR & 0xffffffff,
+ ndev->reg_base + SNB_B2B_XLAT_OFFSETL);
+ writel(SNB_MBAR01_USD_ADDR >> 32,
+ ndev->reg_base + SNB_B2B_XLAT_OFFSETU);
+ }
+ writeq(SNB_MBAR01_DSD_ADDR, ndev->reg_base +
+ SNB_SBAR0BASE_OFFSET);
+ writeq(SNB_MBAR23_DSD_ADDR, ndev->reg_base +
+ SNB_SBAR2BASE_OFFSET);
+ writeq(SNB_MBAR45_DSD_ADDR, ndev->reg_base +
+ SNB_SBAR4BASE_OFFSET);
+ }
+
+ ndev->limits.max_spads = SNB_MAX_B2B_SPADS;
ndev->limits.max_db_bits = SNB_MAX_DB_BITS;
ndev->limits.msix_cnt = SNB_MSIX_CNT;
ndev->bits_per_vector = SNB_DB_BITS_PER_VEC;
ndev->limits.max_spads = BWD_MAX_COMPAT_SPADS;
}
+ ndev->limits.max_mw = BWD_MAX_MW;
ndev->limits.max_db_bits = BWD_MAX_DB_BITS;
ndev->limits.msix_cnt = BWD_MSIX_CNT;
ndev->bits_per_vector = BWD_DB_BITS_PER_VEC;
/* Since bwd doesn't have a link interrupt, setup a poll timer */
INIT_DELAYED_WORK(&ndev->hb_timer, bwd_link_poll);
+ INIT_DELAYED_WORK(&ndev->lr_timer, bwd_link_recovery);
schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
return 0;
int rc;
switch (ndev->pdev->device) {
- case PCI_DEVICE_ID_INTEL_NTB_2ND_SNB:
- case PCI_DEVICE_ID_INTEL_NTB_RP_JSF:
- case PCI_DEVICE_ID_INTEL_NTB_RP_SNB:
- case PCI_DEVICE_ID_INTEL_NTB_CLASSIC_JSF:
- case PCI_DEVICE_ID_INTEL_NTB_CLASSIC_SNB:
+ case PCI_DEVICE_ID_INTEL_NTB_SS_JSF:
+ case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
+ case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
+ case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
+ case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
+ case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
+ case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
+ case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
+ case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
+ case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
rc = ntb_xeon_setup(ndev);
break;
case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD:
rc = -ENODEV;
}
+ if (rc)
+ return rc;
+
+ dev_info(&ndev->pdev->dev, "Device Type = %s\n",
+ ndev->dev_type == NTB_DEV_USD ? "USD/DSP" : "DSD/USP");
+
/* Enable Bus Master and Memory Space on the secondary side */
writew(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, ndev->reg_ofs.spci_cmd);
- return rc;
+ return 0;
}
static void ntb_device_free(struct ntb_device *ndev)
{
- if (ndev->hw_type == BWD_HW)
+ if (ndev->hw_type == BWD_HW) {
cancel_delayed_work_sync(&ndev->hb_timer);
+ cancel_delayed_work_sync(&ndev->lr_timer);
+ }
}
static irqreturn_t bwd_callback_msix_irq(int irq, void *data)
kfree(ndev->db_cb);
}
+static void ntb_setup_debugfs(struct ntb_device *ndev)
+{
+ if (!debugfs_initialized())
+ return;
+
+ if (!debugfs_dir)
+ debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+ ndev->debugfs_dir = debugfs_create_dir(pci_name(ndev->pdev),
+ debugfs_dir);
+}
+
+static void ntb_free_debugfs(struct ntb_device *ndev)
+{
+ debugfs_remove_recursive(ndev->debugfs_dir);
+
+ if (debugfs_dir && simple_empty(debugfs_dir)) {
+ debugfs_remove_recursive(debugfs_dir);
+ debugfs_dir = NULL;
+ }
+}
+
static int ntb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct ntb_device *ndev;
ndev->pdev = pdev;
ndev->link_status = NTB_LINK_DOWN;
pci_set_drvdata(pdev, ndev);
+ ntb_setup_debugfs(ndev);
rc = pci_enable_device(pdev);
if (rc)
goto err2;
}
- for (i = 0; i < NTB_NUM_MW; i++) {
+ for (i = 0; i < NTB_MAX_NUM_MW; i++) {
ndev->mw[i].bar_sz = pci_resource_len(pdev, MW_TO_BAR(i));
ndev->mw[i].vbase =
ioremap_wc(pci_resource_start(pdev, MW_TO_BAR(i)),
err1:
pci_disable_device(pdev);
err:
+ ntb_free_debugfs(ndev);
kfree(ndev);
dev_err(&pdev->dev, "Error loading %s module\n", KBUILD_MODNAME);
ntb_free_callbacks(ndev);
ntb_device_free(ndev);
- for (i = 0; i < NTB_NUM_MW; i++)
+ for (i = 0; i < NTB_MAX_NUM_MW; i++)
iounmap(ndev->mw[i].vbase);
iounmap(ndev->reg_base);
pci_release_selected_regions(pdev, NTB_BAR_MASK);
pci_disable_device(pdev);
+ ntb_free_debugfs(ndev);
kfree(ndev);
}