mlxsw: Add PCI bus implementation
authorJiri Pirko <jiri@mellanox.com>
Wed, 29 Jul 2015 21:33:47 +0000 (23:33 +0200)
committerDavid S. Miller <davem@davemloft.net>
Thu, 30 Jul 2015 07:04:59 +0000 (00:04 -0700)
Add PCI bus implementation for Mellanox Technologies Switch ASICs. This
includes firmware initialization, async queues manipulation and command
interface implementation.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Elad Raz <eladr@mellanox.com>
Reviewed-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/mellanox/mlxsw/Kconfig
drivers/net/ethernet/mellanox/mlxsw/Makefile
drivers/net/ethernet/mellanox/mlxsw/pci.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/pci.h [new file with mode: 0644]

index 46268f2..1385f2c 100644 (file)
@@ -9,3 +9,13 @@ config MLXSW_CORE
 
          To compile this driver as a module, choose M here: the
          module will be called mlxsw_core.
+
+config MLXSW_PCI
+       tristate "PCI bus implementation for Mellanox Technologies Switch ASICs"
+       depends on PCI && MLXSW_CORE
+       default m
+       ---help---
+         This is PCI bus implementation for Mellanox Technologies Switch ASICs.
+
+         To compile this driver as a module, choose M here: the
+         module will be called mlxsw_pci.
index 271de28..94841c3 100644 (file)
@@ -1,2 +1,4 @@
 obj-$(CONFIG_MLXSW_CORE)       += mlxsw_core.o
 mlxsw_core-objs                        := core.o
+obj-$(CONFIG_MLXSW_PCI)                += mlxsw_pci.o
+mlxsw_pci-objs                 := pci.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
new file mode 100644 (file)
index 0000000..64f725f
--- /dev/null
@@ -0,0 +1,1791 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/pci.c
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#include <linux/log2.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "pci.h"
+#include "core.h"
+#include "cmd.h"
+#include "port.h"
+
+static const char mlxsw_pci_driver_name[] = "mlxsw_pci";
+
+static const struct pci_device_id mlxsw_pci_id_table[] = {
+       {0, }
+};
+
+static struct dentry *mlxsw_pci_dbg_root;
+
+static const char *mlxsw_pci_device_kind_get(const struct pci_device_id *id)
+{
+       switch (id->device) {
+       default:
+               BUG();
+       }
+}
+
+#define mlxsw_pci_write32(mlxsw_pci, reg, val) \
+       iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg))
+#define mlxsw_pci_read32(mlxsw_pci, reg) \
+       ioread32be((mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg))
+
+enum mlxsw_pci_queue_type {
+       MLXSW_PCI_QUEUE_TYPE_SDQ,
+       MLXSW_PCI_QUEUE_TYPE_RDQ,
+       MLXSW_PCI_QUEUE_TYPE_CQ,
+       MLXSW_PCI_QUEUE_TYPE_EQ,
+};
+
+static const char *mlxsw_pci_queue_type_str(enum mlxsw_pci_queue_type q_type)
+{
+       switch (q_type) {
+       case MLXSW_PCI_QUEUE_TYPE_SDQ:
+               return "sdq";
+       case MLXSW_PCI_QUEUE_TYPE_RDQ:
+               return "rdq";
+       case MLXSW_PCI_QUEUE_TYPE_CQ:
+               return "cq";
+       case MLXSW_PCI_QUEUE_TYPE_EQ:
+               return "eq";
+       }
+       BUG();
+}
+
+#define MLXSW_PCI_QUEUE_TYPE_COUNT     4
+
+static const u16 mlxsw_pci_doorbell_type_offset[] = {
+       MLXSW_PCI_DOORBELL_SDQ_OFFSET,  /* for type MLXSW_PCI_QUEUE_TYPE_SDQ */
+       MLXSW_PCI_DOORBELL_RDQ_OFFSET,  /* for type MLXSW_PCI_QUEUE_TYPE_RDQ */
+       MLXSW_PCI_DOORBELL_CQ_OFFSET,   /* for type MLXSW_PCI_QUEUE_TYPE_CQ */
+       MLXSW_PCI_DOORBELL_EQ_OFFSET,   /* for type MLXSW_PCI_QUEUE_TYPE_EQ */
+};
+
+static const u16 mlxsw_pci_doorbell_arm_type_offset[] = {
+       0, /* unused */
+       0, /* unused */
+       MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_CQ */
+       MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET, /* for type MLXSW_PCI_QUEUE_TYPE_EQ */
+};
+
+struct mlxsw_pci_mem_item {
+       char *buf;
+       dma_addr_t mapaddr;
+       size_t size;
+};
+
+struct mlxsw_pci_queue_elem_info {
+       char *elem; /* pointer to actual dma mapped element mem chunk */
+       union {
+               struct {
+                       struct sk_buff *skb;
+               } sdq;
+               struct {
+                       struct sk_buff *skb;
+               } rdq;
+       } u;
+};
+
+struct mlxsw_pci_queue {
+       spinlock_t lock; /* for queue accesses */
+       struct mlxsw_pci_mem_item mem_item;
+       struct mlxsw_pci_queue_elem_info *elem_info;
+       u16 producer_counter;
+       u16 consumer_counter;
+       u16 count; /* number of elements in queue */
+       u8 num; /* queue number */
+       u8 elem_size; /* size of one element */
+       enum mlxsw_pci_queue_type type;
+       struct tasklet_struct tasklet; /* queue processing tasklet */
+       struct mlxsw_pci *pci;
+       union {
+               struct {
+                       u32 comp_sdq_count;
+                       u32 comp_rdq_count;
+               } cq;
+               struct {
+                       u32 ev_cmd_count;
+                       u32 ev_comp_count;
+                       u32 ev_other_count;
+               } eq;
+       } u;
+};
+
+struct mlxsw_pci_queue_type_group {
+       struct mlxsw_pci_queue *q;
+       u8 count; /* number of queues in group */
+};
+
+struct mlxsw_pci {
+       struct pci_dev *pdev;
+       u8 __iomem *hw_addr;
+       struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT];
+       u32 doorbell_offset;
+       struct msix_entry msix_entry;
+       struct mlxsw_core *core;
+       struct {
+               u16 num_pages;
+               struct mlxsw_pci_mem_item *items;
+       } fw_area;
+       struct {
+               struct mutex lock; /* Lock access to command registers */
+               bool nopoll;
+               wait_queue_head_t wait;
+               bool wait_done;
+               struct {
+                       u8 status;
+                       u64 out_param;
+               } comp;
+       } cmd;
+       struct mlxsw_bus_info bus_info;
+       struct dentry *dbg_dir;
+};
+
+static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q)
+{
+       tasklet_schedule(&q->tasklet);
+}
+
+static char *__mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q,
+                                       size_t elem_size, int elem_index)
+{
+       return q->mem_item.buf + (elem_size * elem_index);
+}
+
+static struct mlxsw_pci_queue_elem_info *
+mlxsw_pci_queue_elem_info_get(struct mlxsw_pci_queue *q, int elem_index)
+{
+       return &q->elem_info[elem_index];
+}
+
+static struct mlxsw_pci_queue_elem_info *
+mlxsw_pci_queue_elem_info_producer_get(struct mlxsw_pci_queue *q)
+{
+       int index = q->producer_counter & (q->count - 1);
+
+       if ((q->producer_counter - q->consumer_counter) == q->count)
+               return NULL;
+       return mlxsw_pci_queue_elem_info_get(q, index);
+}
+
+static struct mlxsw_pci_queue_elem_info *
+mlxsw_pci_queue_elem_info_consumer_get(struct mlxsw_pci_queue *q)
+{
+       int index = q->consumer_counter & (q->count - 1);
+
+       return mlxsw_pci_queue_elem_info_get(q, index);
+}
+
+static char *mlxsw_pci_queue_elem_get(struct mlxsw_pci_queue *q, int elem_index)
+{
+       return mlxsw_pci_queue_elem_info_get(q, elem_index)->elem;
+}
+
+static bool mlxsw_pci_elem_hw_owned(struct mlxsw_pci_queue *q, bool owner_bit)
+{
+       return owner_bit != !!(q->consumer_counter & q->count);
+}
+
+static char *mlxsw_pci_queue_sw_elem_get(struct mlxsw_pci_queue *q,
+                                        u32 (*get_elem_owner_func)(char *))
+{
+       struct mlxsw_pci_queue_elem_info *elem_info;
+       char *elem;
+       bool owner_bit;
+
+       elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+       elem = elem_info->elem;
+       owner_bit = get_elem_owner_func(elem);
+       if (mlxsw_pci_elem_hw_owned(q, owner_bit))
+               return NULL;
+       q->consumer_counter++;
+       rmb(); /* make sure we read owned bit before the rest of elem */
+       return elem;
+}
+
+static struct mlxsw_pci_queue_type_group *
+mlxsw_pci_queue_type_group_get(struct mlxsw_pci *mlxsw_pci,
+                              enum mlxsw_pci_queue_type q_type)
+{
+       return &mlxsw_pci->queues[q_type];
+}
+
+static u8 __mlxsw_pci_queue_count(struct mlxsw_pci *mlxsw_pci,
+                                 enum mlxsw_pci_queue_type q_type)
+{
+       struct mlxsw_pci_queue_type_group *queue_group;
+
+       queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_type);
+       return queue_group->count;
+}
+
+static u8 mlxsw_pci_sdq_count(struct mlxsw_pci *mlxsw_pci)
+{
+       return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_SDQ);
+}
+
+static u8 mlxsw_pci_rdq_count(struct mlxsw_pci *mlxsw_pci)
+{
+       return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_RDQ);
+}
+
+static u8 mlxsw_pci_cq_count(struct mlxsw_pci *mlxsw_pci)
+{
+       return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ);
+}
+
+static u8 mlxsw_pci_eq_count(struct mlxsw_pci *mlxsw_pci)
+{
+       return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ);
+}
+
+static struct mlxsw_pci_queue *
+__mlxsw_pci_queue_get(struct mlxsw_pci *mlxsw_pci,
+                     enum mlxsw_pci_queue_type q_type, u8 q_num)
+{
+       return &mlxsw_pci->queues[q_type].q[q_num];
+}
+
+static struct mlxsw_pci_queue *mlxsw_pci_sdq_get(struct mlxsw_pci *mlxsw_pci,
+                                                u8 q_num)
+{
+       return __mlxsw_pci_queue_get(mlxsw_pci,
+                                    MLXSW_PCI_QUEUE_TYPE_SDQ, q_num);
+}
+
+static struct mlxsw_pci_queue *mlxsw_pci_rdq_get(struct mlxsw_pci *mlxsw_pci,
+                                                u8 q_num)
+{
+       return __mlxsw_pci_queue_get(mlxsw_pci,
+                                    MLXSW_PCI_QUEUE_TYPE_RDQ, q_num);
+}
+
+static struct mlxsw_pci_queue *mlxsw_pci_cq_get(struct mlxsw_pci *mlxsw_pci,
+                                               u8 q_num)
+{
+       return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ, q_num);
+}
+
+static struct mlxsw_pci_queue *mlxsw_pci_eq_get(struct mlxsw_pci *mlxsw_pci,
+                                               u8 q_num)
+{
+       return __mlxsw_pci_queue_get(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ, q_num);
+}
+
+static void __mlxsw_pci_queue_doorbell_set(struct mlxsw_pci *mlxsw_pci,
+                                          struct mlxsw_pci_queue *q,
+                                          u16 val)
+{
+       mlxsw_pci_write32(mlxsw_pci,
+                         DOORBELL(mlxsw_pci->doorbell_offset,
+                                  mlxsw_pci_doorbell_type_offset[q->type],
+                                  q->num), val);
+}
+
+static void __mlxsw_pci_queue_doorbell_arm_set(struct mlxsw_pci *mlxsw_pci,
+                                              struct mlxsw_pci_queue *q,
+                                              u16 val)
+{
+       mlxsw_pci_write32(mlxsw_pci,
+                         DOORBELL(mlxsw_pci->doorbell_offset,
+                                  mlxsw_pci_doorbell_arm_type_offset[q->type],
+                                  q->num), val);
+}
+
+static void mlxsw_pci_queue_doorbell_producer_ring(struct mlxsw_pci *mlxsw_pci,
+                                                  struct mlxsw_pci_queue *q)
+{
+       wmb(); /* ensure all writes are done before we ring a bell */
+       __mlxsw_pci_queue_doorbell_set(mlxsw_pci, q, q->producer_counter);
+}
+
+static void mlxsw_pci_queue_doorbell_consumer_ring(struct mlxsw_pci *mlxsw_pci,
+                                                  struct mlxsw_pci_queue *q)
+{
+       wmb(); /* ensure all writes are done before we ring a bell */
+       __mlxsw_pci_queue_doorbell_set(mlxsw_pci, q,
+                                      q->consumer_counter + q->count);
+}
+
+static void
+mlxsw_pci_queue_doorbell_arm_consumer_ring(struct mlxsw_pci *mlxsw_pci,
+                                          struct mlxsw_pci_queue *q)
+{
+       wmb(); /* ensure all writes are done before we ring a bell */
+       __mlxsw_pci_queue_doorbell_arm_set(mlxsw_pci, q, q->consumer_counter);
+}
+
+static dma_addr_t __mlxsw_pci_queue_page_get(struct mlxsw_pci_queue *q,
+                                            int page_index)
+{
+       return q->mem_item.mapaddr + MLXSW_PCI_PAGE_SIZE * page_index;
+}
+
+static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+                             struct mlxsw_pci_queue *q)
+{
+       int i;
+       int err;
+
+       q->producer_counter = 0;
+       q->consumer_counter = 0;
+
+       /* Set CQ of same number of this SDQ. */
+       mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num);
+       mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, 7);
+       mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */
+       for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
+               dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
+
+               mlxsw_cmd_mbox_sw2hw_dq_pa_set(mbox, i, mapaddr);
+       }
+
+       err = mlxsw_cmd_sw2hw_sdq(mlxsw_pci->core, mbox, q->num);
+       if (err)
+               return err;
+       mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
+       return 0;
+}
+
+static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci,
+                              struct mlxsw_pci_queue *q)
+{
+       mlxsw_cmd_hw2sw_sdq(mlxsw_pci->core, q->num);
+}
+
+static int mlxsw_pci_sdq_dbg_read(struct seq_file *file, void *data)
+{
+       struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private);
+       struct mlxsw_pci_queue *q;
+       int i;
+       static const char hdr[] =
+               "NUM PROD_COUNT CONS_COUNT COUNT\n";
+
+       seq_printf(file, hdr);
+       for (i = 0; i < mlxsw_pci_sdq_count(mlxsw_pci); i++) {
+               q = mlxsw_pci_sdq_get(mlxsw_pci, i);
+               spin_lock_bh(&q->lock);
+               seq_printf(file, "%3d %10d %10d %5d\n",
+                          i, q->producer_counter, q->consumer_counter,
+                          q->count);
+               spin_unlock_bh(&q->lock);
+       }
+       return 0;
+}
+
+static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe,
+                                 int index, char *frag_data, size_t frag_len,
+                                 int direction)
+{
+       struct pci_dev *pdev = mlxsw_pci->pdev;
+       dma_addr_t mapaddr;
+
+       mapaddr = pci_map_single(pdev, frag_data, frag_len, direction);
+       if (unlikely(pci_dma_mapping_error(pdev, mapaddr))) {
+               if (net_ratelimit())
+                       dev_err(&pdev->dev, "failed to dma map tx frag\n");
+               return -EIO;
+       }
+       mlxsw_pci_wqe_address_set(wqe, index, mapaddr);
+       mlxsw_pci_wqe_byte_count_set(wqe, index, frag_len);
+       return 0;
+}
+
+static void mlxsw_pci_wqe_frag_unmap(struct mlxsw_pci *mlxsw_pci, char *wqe,
+                                    int index, int direction)
+{
+       struct pci_dev *pdev = mlxsw_pci->pdev;
+       size_t frag_len = mlxsw_pci_wqe_byte_count_get(wqe, index);
+       dma_addr_t mapaddr = mlxsw_pci_wqe_address_get(wqe, index);
+
+       if (!frag_len)
+               return;
+       pci_unmap_single(pdev, mapaddr, frag_len, direction);
+}
+
+static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci,
+                                  struct mlxsw_pci_queue_elem_info *elem_info)
+{
+       size_t buf_len = MLXSW_PORT_MAX_MTU;
+       char *wqe = elem_info->elem;
+       struct sk_buff *skb;
+       int err;
+
+       elem_info->u.rdq.skb = NULL;
+       skb = netdev_alloc_skb_ip_align(NULL, buf_len);
+       if (!skb)
+               return -ENOMEM;
+
+       /* Assume that wqe was previously zeroed. */
+
+       err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
+                                    buf_len, DMA_FROM_DEVICE);
+       if (err)
+               goto err_frag_map;
+
+       elem_info->u.rdq.skb = skb;
+       return 0;
+
+err_frag_map:
+       dev_kfree_skb_any(skb);
+       return err;
+}
+
+static void mlxsw_pci_rdq_skb_free(struct mlxsw_pci *mlxsw_pci,
+                                  struct mlxsw_pci_queue_elem_info *elem_info)
+{
+       struct sk_buff *skb;
+       char *wqe;
+
+       skb = elem_info->u.rdq.skb;
+       wqe = elem_info->elem;
+
+       mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
+       dev_kfree_skb_any(skb);
+}
+
+static int mlxsw_pci_rdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+                             struct mlxsw_pci_queue *q)
+{
+       struct mlxsw_pci_queue_elem_info *elem_info;
+       int i;
+       int err;
+
+       q->producer_counter = 0;
+       q->consumer_counter = 0;
+
+       /* Set CQ of same number of this RDQ with base
+        * above MLXSW_PCI_SDQS_MAX as the lower ones are assigned to SDQs.
+        */
+       mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num + MLXSW_PCI_SDQS_COUNT);
+       mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */
+       for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
+               dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
+
+               mlxsw_cmd_mbox_sw2hw_dq_pa_set(mbox, i, mapaddr);
+       }
+
+       err = mlxsw_cmd_sw2hw_rdq(mlxsw_pci->core, mbox, q->num);
+       if (err)
+               return err;
+
+       mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
+
+       for (i = 0; i < q->count; i++) {
+               elem_info = mlxsw_pci_queue_elem_info_producer_get(q);
+               BUG_ON(!elem_info);
+               err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
+               if (err)
+                       goto rollback;
+               /* Everything is set up, ring doorbell to pass elem to HW */
+               q->producer_counter++;
+               mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
+       }
+
+       return 0;
+
+rollback:
+       for (i--; i >= 0; i--) {
+               elem_info = mlxsw_pci_queue_elem_info_get(q, i);
+               mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info);
+       }
+       mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num);
+
+       return err;
+}
+
+static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci,
+                              struct mlxsw_pci_queue *q)
+{
+       struct mlxsw_pci_queue_elem_info *elem_info;
+       int i;
+
+       mlxsw_cmd_hw2sw_rdq(mlxsw_pci->core, q->num);
+       for (i = 0; i < q->count; i++) {
+               elem_info = mlxsw_pci_queue_elem_info_get(q, i);
+               mlxsw_pci_rdq_skb_free(mlxsw_pci, elem_info);
+       }
+}
+
+static int mlxsw_pci_rdq_dbg_read(struct seq_file *file, void *data)
+{
+       struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private);
+       struct mlxsw_pci_queue *q;
+       int i;
+       static const char hdr[] =
+               "NUM PROD_COUNT CONS_COUNT COUNT\n";
+
+       seq_printf(file, hdr);
+       for (i = 0; i < mlxsw_pci_rdq_count(mlxsw_pci); i++) {
+               q = mlxsw_pci_rdq_get(mlxsw_pci, i);
+               spin_lock_bh(&q->lock);
+               seq_printf(file, "%3d %10d %10d %5d\n",
+                          i, q->producer_counter, q->consumer_counter,
+                          q->count);
+               spin_unlock_bh(&q->lock);
+       }
+       return 0;
+}
+
+static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+                            struct mlxsw_pci_queue *q)
+{
+       int i;
+       int err;
+
+       q->consumer_counter = 0;
+
+       for (i = 0; i < q->count; i++) {
+               char *elem = mlxsw_pci_queue_elem_get(q, i);
+
+               mlxsw_pci_cqe_owner_set(elem, 1);
+       }
+
+       mlxsw_cmd_mbox_sw2hw_cq_cv_set(mbox, 0); /* CQE ver 0 */
+       mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM);
+       mlxsw_cmd_mbox_sw2hw_cq_oi_set(mbox, 0);
+       mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0);
+       mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count));
+       for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
+               dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
+
+               mlxsw_cmd_mbox_sw2hw_cq_pa_set(mbox, i, mapaddr);
+       }
+       err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num);
+       if (err)
+               return err;
+       mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
+       mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
+       return 0;
+}
+
+static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci,
+                             struct mlxsw_pci_queue *q)
+{
+       mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num);
+}
+
+static int mlxsw_pci_cq_dbg_read(struct seq_file *file, void *data)
+{
+       struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private);
+
+       struct mlxsw_pci_queue *q;
+       int i;
+       static const char hdr[] =
+               "NUM CONS_INDEX  SDQ_COUNT  RDQ_COUNT COUNT\n";
+
+       seq_printf(file, hdr);
+       for (i = 0; i < mlxsw_pci_cq_count(mlxsw_pci); i++) {
+               q = mlxsw_pci_cq_get(mlxsw_pci, i);
+               spin_lock_bh(&q->lock);
+               seq_printf(file, "%3d %10d %10d %10d %5d\n",
+                          i, q->consumer_counter, q->u.cq.comp_sdq_count,
+                          q->u.cq.comp_rdq_count, q->count);
+               spin_unlock_bh(&q->lock);
+       }
+       return 0;
+}
+
+static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci,
+                                    struct mlxsw_pci_queue *q,
+                                    u16 consumer_counter_limit,
+                                    char *cqe)
+{
+       struct pci_dev *pdev = mlxsw_pci->pdev;
+       struct mlxsw_pci_queue_elem_info *elem_info;
+       char *wqe;
+       struct sk_buff *skb;
+       int i;
+
+       spin_lock(&q->lock);
+       elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+       skb = elem_info->u.sdq.skb;
+       wqe = elem_info->elem;
+       for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
+               mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE);
+       dev_kfree_skb_any(skb);
+       elem_info->u.sdq.skb = NULL;
+
+       if (q->consumer_counter++ != consumer_counter_limit)
+               dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in SDQ\n");
+       spin_unlock(&q->lock);
+}
+
+static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
+                                    struct mlxsw_pci_queue *q,
+                                    u16 consumer_counter_limit,
+                                    char *cqe)
+{
+       struct pci_dev *pdev = mlxsw_pci->pdev;
+       struct mlxsw_pci_queue_elem_info *elem_info;
+       char *wqe;
+       struct sk_buff *skb;
+       struct mlxsw_rx_info rx_info;
+       int err;
+
+       elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
+       skb = elem_info->u.sdq.skb;
+       if (!skb)
+               return;
+       wqe = elem_info->elem;
+       mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
+
+       if (q->consumer_counter++ != consumer_counter_limit)
+               dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
+
+       /* We do not support lag now */
+       if (mlxsw_pci_cqe_lag_get(cqe))
+               goto drop;
+
+       rx_info.sys_port = mlxsw_pci_cqe_system_port_get(cqe);
+       rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe);
+
+       skb_put(skb, mlxsw_pci_cqe_byte_count_get(cqe));
+       mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
+
+put_new_skb:
+       memset(wqe, 0, q->elem_size);
+       err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
+       if (err && net_ratelimit())
+               dev_dbg(&pdev->dev, "Failed to alloc skb for RDQ\n");
+       /* Everything is set up, ring doorbell to pass elem to HW */
+       q->producer_counter++;
+       mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
+       return;
+
+drop:
+       dev_kfree_skb_any(skb);
+       goto put_new_skb;
+}
+
+static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q)
+{
+       return mlxsw_pci_queue_sw_elem_get(q, mlxsw_pci_cqe_owner_get);
+}
+
+static void mlxsw_pci_cq_tasklet(unsigned long data)
+{
+       struct mlxsw_pci_queue *q = (struct mlxsw_pci_queue *) data;
+       struct mlxsw_pci *mlxsw_pci = q->pci;
+       char *cqe;
+       int items = 0;
+       int credits = q->count >> 1;
+
+       while ((cqe = mlxsw_pci_cq_sw_cqe_get(q))) {
+               u16 wqe_counter = mlxsw_pci_cqe_wqe_counter_get(cqe);
+               u8 sendq = mlxsw_pci_cqe_sr_get(cqe);
+               u8 dqn = mlxsw_pci_cqe_dqn_get(cqe);
+
+               if (sendq) {
+                       struct mlxsw_pci_queue *sdq;
+
+                       sdq = mlxsw_pci_sdq_get(mlxsw_pci, dqn);
+                       mlxsw_pci_cqe_sdq_handle(mlxsw_pci, sdq,
+                                                wqe_counter, cqe);
+                       q->u.cq.comp_sdq_count++;
+               } else {
+                       struct mlxsw_pci_queue *rdq;
+
+                       rdq = mlxsw_pci_rdq_get(mlxsw_pci, dqn);
+                       mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq,
+                                                wqe_counter, cqe);
+                       q->u.cq.comp_rdq_count++;
+               }
+               if (++items == credits)
+                       break;
+       }
+       if (items) {
+               mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
+               mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
+       }
+}
+
+static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+                            struct mlxsw_pci_queue *q)
+{
+       int i;
+       int err;
+
+       q->consumer_counter = 0;
+
+       for (i = 0; i < q->count; i++) {
+               char *elem = mlxsw_pci_queue_elem_get(q, i);
+
+               mlxsw_pci_eqe_owner_set(elem, 1);
+       }
+
+       mlxsw_cmd_mbox_sw2hw_eq_int_msix_set(mbox, 1); /* MSI-X used */
+       mlxsw_cmd_mbox_sw2hw_eq_oi_set(mbox, 0);
+       mlxsw_cmd_mbox_sw2hw_eq_st_set(mbox, 1); /* armed */
+       mlxsw_cmd_mbox_sw2hw_eq_log_eq_size_set(mbox, ilog2(q->count));
+       for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) {
+               dma_addr_t mapaddr = __mlxsw_pci_queue_page_get(q, i);
+
+               mlxsw_cmd_mbox_sw2hw_eq_pa_set(mbox, i, mapaddr);
+       }
+       err = mlxsw_cmd_sw2hw_eq(mlxsw_pci->core, mbox, q->num);
+       if (err)
+               return err;
+       mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
+       mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
+       return 0;
+}
+
+static void mlxsw_pci_eq_fini(struct mlxsw_pci *mlxsw_pci,
+                             struct mlxsw_pci_queue *q)
+{
+       mlxsw_cmd_hw2sw_eq(mlxsw_pci->core, q->num);
+}
+
+static int mlxsw_pci_eq_dbg_read(struct seq_file *file, void *data)
+{
+       struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private);
+       struct mlxsw_pci_queue *q;
+       int i;
+       static const char hdr[] =
+               "NUM CONS_COUNT     EV_CMD    EV_COMP   EV_OTHER COUNT\n";
+
+       seq_printf(file, hdr);
+       for (i = 0; i < mlxsw_pci_eq_count(mlxsw_pci); i++) {
+               q = mlxsw_pci_eq_get(mlxsw_pci, i);
+               spin_lock_bh(&q->lock);
+               seq_printf(file, "%3d %10d %10d %10d %10d %5d\n",
+                          i, q->consumer_counter, q->u.eq.ev_cmd_count,
+                          q->u.eq.ev_comp_count, q->u.eq.ev_other_count,
+                          q->count);
+               spin_unlock_bh(&q->lock);
+       }
+       return 0;
+}
+
+static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe)
+{
+       mlxsw_pci->cmd.comp.status = mlxsw_pci_eqe_cmd_status_get(eqe);
+       mlxsw_pci->cmd.comp.out_param =
+               ((u64) mlxsw_pci_eqe_cmd_out_param_h_get(eqe)) << 32 |
+               mlxsw_pci_eqe_cmd_out_param_l_get(eqe);
+       mlxsw_pci->cmd.wait_done = true;
+       wake_up(&mlxsw_pci->cmd.wait);
+}
+
+static char *mlxsw_pci_eq_sw_eqe_get(struct mlxsw_pci_queue *q)
+{
+       return mlxsw_pci_queue_sw_elem_get(q, mlxsw_pci_eqe_owner_get);
+}
+
+static void mlxsw_pci_eq_tasklet(unsigned long data)
+{
+       struct mlxsw_pci_queue *q = (struct mlxsw_pci_queue *) data;
+       struct mlxsw_pci *mlxsw_pci = q->pci;
+       unsigned long active_cqns[BITS_TO_LONGS(MLXSW_PCI_CQS_COUNT)];
+       char *eqe;
+       u8 cqn;
+       bool cq_handle = false;
+       int items = 0;
+       int credits = q->count >> 1;
+
+       memset(&active_cqns, 0, sizeof(active_cqns));
+
+       while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) {
+               u8 event_type = mlxsw_pci_eqe_event_type_get(eqe);
+
+               switch (event_type) {
+               case MLXSW_PCI_EQE_EVENT_TYPE_CMD:
+                       mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe);
+                       q->u.eq.ev_cmd_count++;
+                       break;
+               case MLXSW_PCI_EQE_EVENT_TYPE_COMP:
+                       cqn = mlxsw_pci_eqe_cqn_get(eqe);
+                       set_bit(cqn, active_cqns);
+                       cq_handle = true;
+                       q->u.eq.ev_comp_count++;
+                       break;
+               default:
+                       q->u.eq.ev_other_count++;
+               }
+               if (++items == credits)
+                       break;
+       }
+       if (items) {
+               mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
+               mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
+       }
+
+       if (!cq_handle)
+               return;
+       for_each_set_bit(cqn, active_cqns, MLXSW_PCI_CQS_COUNT) {
+               q = mlxsw_pci_cq_get(mlxsw_pci, cqn);
+               mlxsw_pci_queue_tasklet_schedule(q);
+       }
+}
+
+struct mlxsw_pci_queue_ops {
+       const char *name;
+       enum mlxsw_pci_queue_type type;
+       int (*init)(struct mlxsw_pci *mlxsw_pci, char *mbox,
+                   struct mlxsw_pci_queue *q);
+       void (*fini)(struct mlxsw_pci *mlxsw_pci,
+                    struct mlxsw_pci_queue *q);
+       void (*tasklet)(unsigned long data);
+       int (*dbg_read)(struct seq_file *s, void *data);
+       u16 elem_count;
+       u8 elem_size;
+};
+
+static const struct mlxsw_pci_queue_ops mlxsw_pci_sdq_ops = {
+       .type           = MLXSW_PCI_QUEUE_TYPE_SDQ,
+       .init           = mlxsw_pci_sdq_init,
+       .fini           = mlxsw_pci_sdq_fini,
+       .dbg_read       = mlxsw_pci_sdq_dbg_read,
+       .elem_count     = MLXSW_PCI_WQE_COUNT,
+       .elem_size      = MLXSW_PCI_WQE_SIZE,
+};
+
+static const struct mlxsw_pci_queue_ops mlxsw_pci_rdq_ops = {
+       .type           = MLXSW_PCI_QUEUE_TYPE_RDQ,
+       .init           = mlxsw_pci_rdq_init,
+       .fini           = mlxsw_pci_rdq_fini,
+       .dbg_read       = mlxsw_pci_rdq_dbg_read,
+       .elem_count     = MLXSW_PCI_WQE_COUNT,
+       .elem_size      = MLXSW_PCI_WQE_SIZE
+};
+
+static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = {
+       .type           = MLXSW_PCI_QUEUE_TYPE_CQ,
+       .init           = mlxsw_pci_cq_init,
+       .fini           = mlxsw_pci_cq_fini,
+       .tasklet        = mlxsw_pci_cq_tasklet,
+       .dbg_read       = mlxsw_pci_cq_dbg_read,
+       .elem_count     = MLXSW_PCI_CQE_COUNT,
+       .elem_size      = MLXSW_PCI_CQE_SIZE
+};
+
+static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = {
+       .type           = MLXSW_PCI_QUEUE_TYPE_EQ,
+       .init           = mlxsw_pci_eq_init,
+       .fini           = mlxsw_pci_eq_fini,
+       .tasklet        = mlxsw_pci_eq_tasklet,
+       .dbg_read       = mlxsw_pci_eq_dbg_read,
+       .elem_count     = MLXSW_PCI_EQE_COUNT,
+       .elem_size      = MLXSW_PCI_EQE_SIZE
+};
+
+static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+                               const struct mlxsw_pci_queue_ops *q_ops,
+                               struct mlxsw_pci_queue *q, u8 q_num)
+{
+       struct mlxsw_pci_mem_item *mem_item = &q->mem_item;
+       int i;
+       int err;
+
+       spin_lock_init(&q->lock);
+       q->num = q_num;
+       q->count = q_ops->elem_count;
+       q->elem_size = q_ops->elem_size;
+       q->type = q_ops->type;
+       q->pci = mlxsw_pci;
+
+       if (q_ops->tasklet)
+               tasklet_init(&q->tasklet, q_ops->tasklet, (unsigned long) q);
+
+       mem_item->size = MLXSW_PCI_AQ_SIZE;
+       mem_item->buf = pci_alloc_consistent(mlxsw_pci->pdev,
+                                            mem_item->size,
+                                            &mem_item->mapaddr);
+       if (!mem_item->buf)
+               return -ENOMEM;
+       memset(mem_item->buf, 0, mem_item->size);
+
+       q->elem_info = kcalloc(q->count, sizeof(*q->elem_info), GFP_KERNEL);
+       if (!q->elem_info) {
+               err = -ENOMEM;
+               goto err_elem_info_alloc;
+       }
+
+       /* Initialize dma mapped elements info elem_info for
+        * future easy access.
+        */
+       for (i = 0; i < q->count; i++) {
+               struct mlxsw_pci_queue_elem_info *elem_info;
+
+               elem_info = mlxsw_pci_queue_elem_info_get(q, i);
+               elem_info->elem =
+                       __mlxsw_pci_queue_elem_get(q, q_ops->elem_size, i);
+       }
+
+       mlxsw_cmd_mbox_zero(mbox);
+       err = q_ops->init(mlxsw_pci, mbox, q);
+       if (err)
+               goto err_q_ops_init;
+       return 0;
+
+err_q_ops_init:
+       kfree(q->elem_info);
+err_elem_info_alloc:
+       pci_free_consistent(mlxsw_pci->pdev, mem_item->size,
+                           mem_item->buf, mem_item->mapaddr);
+       return err;
+}
+
+static void mlxsw_pci_queue_fini(struct mlxsw_pci *mlxsw_pci,
+                                const struct mlxsw_pci_queue_ops *q_ops,
+                                struct mlxsw_pci_queue *q)
+{
+       struct mlxsw_pci_mem_item *mem_item = &q->mem_item;
+
+       q_ops->fini(mlxsw_pci, q);
+       kfree(q->elem_info);
+       pci_free_consistent(mlxsw_pci->pdev, mem_item->size,
+                           mem_item->buf, mem_item->mapaddr);
+}
+
+static int mlxsw_pci_queue_group_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+                                     const struct mlxsw_pci_queue_ops *q_ops,
+                                     u8 num_qs)
+{
+       struct pci_dev *pdev = mlxsw_pci->pdev;
+       struct mlxsw_pci_queue_type_group *queue_group;
+       char tmp[16];
+       int i;
+       int err;
+
+       queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_ops->type);
+       queue_group->q = kcalloc(num_qs, sizeof(*queue_group->q), GFP_KERNEL);
+       if (!queue_group->q)
+               return -ENOMEM;
+
+       for (i = 0; i < num_qs; i++) {
+               err = mlxsw_pci_queue_init(mlxsw_pci, mbox, q_ops,
+                                          &queue_group->q[i], i);
+               if (err)
+                       goto err_queue_init;
+       }
+       queue_group->count = num_qs;
+
+       sprintf(tmp, "%s_stats", mlxsw_pci_queue_type_str(q_ops->type));
+       debugfs_create_devm_seqfile(&pdev->dev, tmp, mlxsw_pci->dbg_dir,
+                                   q_ops->dbg_read);
+
+       return 0;
+
+err_queue_init:
+       for (i--; i >= 0; i--)
+               mlxsw_pci_queue_fini(mlxsw_pci, q_ops, &queue_group->q[i]);
+       kfree(queue_group->q);
+       return err;
+}
+
+static void mlxsw_pci_queue_group_fini(struct mlxsw_pci *mlxsw_pci,
+                                      const struct mlxsw_pci_queue_ops *q_ops)
+{
+       struct mlxsw_pci_queue_type_group *queue_group;
+       int i;
+
+       queue_group = mlxsw_pci_queue_type_group_get(mlxsw_pci, q_ops->type);
+       for (i = 0; i < queue_group->count; i++)
+               mlxsw_pci_queue_fini(mlxsw_pci, q_ops, &queue_group->q[i]);
+       kfree(queue_group->q);
+}
+
+static int mlxsw_pci_aqs_init(struct mlxsw_pci *mlxsw_pci, char *mbox)
+{
+       struct pci_dev *pdev = mlxsw_pci->pdev;
+       u8 num_sdqs;
+       u8 sdq_log2sz;
+       u8 num_rdqs;
+       u8 rdq_log2sz;
+       u8 num_cqs;
+       u8 cq_log2sz;
+       u8 num_eqs;
+       u8 eq_log2sz;
+       int err;
+
+       mlxsw_cmd_mbox_zero(mbox);
+       err = mlxsw_cmd_query_aq_cap(mlxsw_pci->core, mbox);
+       if (err)
+               return err;
+
+       num_sdqs = mlxsw_cmd_mbox_query_aq_cap_max_num_sdqs_get(mbox);
+       sdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_sdq_sz_get(mbox);
+       num_rdqs = mlxsw_cmd_mbox_query_aq_cap_max_num_rdqs_get(mbox);
+       rdq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_rdq_sz_get(mbox);
+       num_cqs = mlxsw_cmd_mbox_query_aq_cap_max_num_cqs_get(mbox);
+       cq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_cq_sz_get(mbox);
+       num_eqs = mlxsw_cmd_mbox_query_aq_cap_max_num_eqs_get(mbox);
+       eq_log2sz = mlxsw_cmd_mbox_query_aq_cap_log_max_eq_sz_get(mbox);
+
+       if ((num_sdqs != MLXSW_PCI_SDQS_COUNT) ||
+           (num_rdqs != MLXSW_PCI_RDQS_COUNT) ||
+           (num_cqs != MLXSW_PCI_CQS_COUNT) ||
+           (num_eqs != MLXSW_PCI_EQS_COUNT)) {
+               dev_err(&pdev->dev, "Unsupported number of queues\n");
+               return -EINVAL;
+       }
+
+       if ((1 << sdq_log2sz != MLXSW_PCI_WQE_COUNT) ||
+           (1 << rdq_log2sz != MLXSW_PCI_WQE_COUNT) ||
+           (1 << cq_log2sz != MLXSW_PCI_CQE_COUNT) ||
+           (1 << eq_log2sz != MLXSW_PCI_EQE_COUNT)) {
+               dev_err(&pdev->dev, "Unsupported number of async queue descriptors\n");
+               return -EINVAL;
+       }
+
+       err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_eq_ops,
+                                        num_eqs);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to initialize event queues\n");
+               return err;
+       }
+
+       err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_cq_ops,
+                                        num_cqs);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to initialize completion queues\n");
+               goto err_cqs_init;
+       }
+
+       err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_sdq_ops,
+                                        num_sdqs);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to initialize send descriptor queues\n");
+               goto err_sdqs_init;
+       }
+
+       err = mlxsw_pci_queue_group_init(mlxsw_pci, mbox, &mlxsw_pci_rdq_ops,
+                                        num_rdqs);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to initialize receive descriptor queues\n");
+               goto err_rdqs_init;
+       }
+
+       /* We have to poll in command interface until queues are initialized */
+       mlxsw_pci->cmd.nopoll = true;
+       return 0;
+
+err_rdqs_init:
+       mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_sdq_ops);
+err_sdqs_init:
+       mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_cq_ops);
+err_cqs_init:
+       mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_eq_ops);
+       return err;
+}
+
+static void mlxsw_pci_aqs_fini(struct mlxsw_pci *mlxsw_pci)
+{
+       mlxsw_pci->cmd.nopoll = false;
+       mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_rdq_ops);
+       mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_sdq_ops);
+       mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_cq_ops);
+       mlxsw_pci_queue_group_fini(mlxsw_pci, &mlxsw_pci_eq_ops);
+}
+
+static void
+mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci,
+                                    char *mbox, int index,
+                                    const struct mlxsw_swid_config *swid)
+{
+       u8 mask = 0;
+
+       if (swid->used_type) {
+               mlxsw_cmd_mbox_config_profile_swid_config_type_set(
+                       mbox, index, swid->type);
+               mask |= 1;
+       }
+       if (swid->used_properties) {
+               mlxsw_cmd_mbox_config_profile_swid_config_properties_set(
+                       mbox, index, swid->properties);
+               mask |= 2;
+       }
+       mlxsw_cmd_mbox_config_profile_swid_config_mask_set(mbox, index, mask);
+}
+
+static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
+                                   const struct mlxsw_config_profile *profile)
+{
+       int i;
+
+       mlxsw_cmd_mbox_zero(mbox);
+
+       if (profile->used_max_vepa_channels) {
+               mlxsw_cmd_mbox_config_profile_set_max_vepa_channels_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_max_vepa_channels_set(
+                       mbox, profile->max_vepa_channels);
+       }
+       if (profile->used_max_lag) {
+               mlxsw_cmd_mbox_config_profile_set_max_lag_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_max_lag_set(
+                       mbox, profile->max_lag);
+       }
+       if (profile->used_max_port_per_lag) {
+               mlxsw_cmd_mbox_config_profile_set_max_port_per_lag_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_max_port_per_lag_set(
+                       mbox, profile->max_port_per_lag);
+       }
+       if (profile->used_max_mid) {
+               mlxsw_cmd_mbox_config_profile_set_max_mid_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_max_mid_set(
+                       mbox, profile->max_mid);
+       }
+       if (profile->used_max_pgt) {
+               mlxsw_cmd_mbox_config_profile_set_max_pgt_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_max_pgt_set(
+                       mbox, profile->max_pgt);
+       }
+       if (profile->used_max_system_port) {
+               mlxsw_cmd_mbox_config_profile_set_max_system_port_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_max_system_port_set(
+                       mbox, profile->max_system_port);
+       }
+       if (profile->used_max_vlan_groups) {
+               mlxsw_cmd_mbox_config_profile_set_max_vlan_groups_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_max_vlan_groups_set(
+                       mbox, profile->max_vlan_groups);
+       }
+       if (profile->used_max_regions) {
+               mlxsw_cmd_mbox_config_profile_set_max_regions_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_max_regions_set(
+                       mbox, profile->max_regions);
+       }
+       if (profile->used_flood_tables) {
+               mlxsw_cmd_mbox_config_profile_set_flood_tables_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_max_flood_tables_set(
+                       mbox, profile->max_flood_tables);
+               mlxsw_cmd_mbox_config_profile_max_vid_flood_tables_set(
+                       mbox, profile->max_vid_flood_tables);
+       }
+       if (profile->used_flood_mode) {
+               mlxsw_cmd_mbox_config_profile_set_flood_mode_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_flood_mode_set(
+                       mbox, profile->flood_mode);
+       }
+       if (profile->used_max_ib_mc) {
+               mlxsw_cmd_mbox_config_profile_set_max_ib_mc_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_max_ib_mc_set(
+                       mbox, profile->max_ib_mc);
+       }
+       if (profile->used_max_pkey) {
+               mlxsw_cmd_mbox_config_profile_set_max_pkey_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_max_pkey_set(
+                       mbox, profile->max_pkey);
+       }
+       if (profile->used_ar_sec) {
+               mlxsw_cmd_mbox_config_profile_set_ar_sec_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_ar_sec_set(
+                       mbox, profile->ar_sec);
+       }
+       if (profile->used_adaptive_routing_group_cap) {
+               mlxsw_cmd_mbox_config_profile_set_adaptive_routing_group_cap_set(
+                       mbox, 1);
+               mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set(
+                       mbox, profile->adaptive_routing_group_cap);
+       }
+
+       for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++)
+               mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i,
+                                                    &profile->swid_config[i]);
+
+       return mlxsw_cmd_config_profile_set(mlxsw_pci->core, mbox);
+}
+
+static int mlxsw_pci_boardinfo(struct mlxsw_pci *mlxsw_pci, char *mbox)
+{
+       struct mlxsw_bus_info *bus_info = &mlxsw_pci->bus_info;
+       int err;
+
+       mlxsw_cmd_mbox_zero(mbox);
+       err = mlxsw_cmd_boardinfo(mlxsw_pci->core, mbox);
+       if (err)
+               return err;
+       mlxsw_cmd_mbox_boardinfo_vsd_memcpy_from(mbox, bus_info->vsd);
+       mlxsw_cmd_mbox_boardinfo_psid_memcpy_from(mbox, bus_info->psid);
+       return 0;
+}
+
+static int mlxsw_pci_fw_area_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
+                                 u16 num_pages)
+{
+       struct mlxsw_pci_mem_item *mem_item;
+       int i;
+       int err;
+
+       mlxsw_pci->fw_area.items = kcalloc(num_pages, sizeof(*mem_item),
+                                          GFP_KERNEL);
+       if (!mlxsw_pci->fw_area.items)
+               return -ENOMEM;
+       mlxsw_pci->fw_area.num_pages = num_pages;
+
+       mlxsw_cmd_mbox_zero(mbox);
+       for (i = 0; i < num_pages; i++) {
+               mem_item = &mlxsw_pci->fw_area.items[i];
+
+               mem_item->size = MLXSW_PCI_PAGE_SIZE;
+               mem_item->buf = pci_alloc_consistent(mlxsw_pci->pdev,
+                                                    mem_item->size,
+                                                    &mem_item->mapaddr);
+               if (!mem_item->buf) {
+                       err = -ENOMEM;
+                       goto err_alloc;
+               }
+               mlxsw_cmd_mbox_map_fa_pa_set(mbox, i, mem_item->mapaddr);
+               mlxsw_cmd_mbox_map_fa_log2size_set(mbox, i, 0); /* 1 page */
+       }
+
+       err = mlxsw_cmd_map_fa(mlxsw_pci->core, mbox, num_pages);
+       if (err)
+               goto err_cmd_map_fa;
+
+       return 0;
+
+err_cmd_map_fa:
+err_alloc:
+       for (i--; i >= 0; i--) {
+               mem_item = &mlxsw_pci->fw_area.items[i];
+
+               pci_free_consistent(mlxsw_pci->pdev, mem_item->size,
+                                   mem_item->buf, mem_item->mapaddr);
+       }
+       kfree(mlxsw_pci->fw_area.items);
+       return err;
+}
+
+static void mlxsw_pci_fw_area_fini(struct mlxsw_pci *mlxsw_pci)
+{
+       struct mlxsw_pci_mem_item *mem_item;
+       int i;
+
+       mlxsw_cmd_unmap_fa(mlxsw_pci->core);
+
+       for (i = 0; i < mlxsw_pci->fw_area.num_pages; i++) {
+               mem_item = &mlxsw_pci->fw_area.items[i];
+
+               pci_free_consistent(mlxsw_pci->pdev, mem_item->size,
+                                   mem_item->buf, mem_item->mapaddr);
+       }
+       kfree(mlxsw_pci->fw_area.items);
+}
+
+static irqreturn_t mlxsw_pci_eq_irq_handler(int irq, void *dev_id)
+{
+       struct mlxsw_pci *mlxsw_pci = dev_id;
+       struct mlxsw_pci_queue *q;
+       int i;
+
+       for (i = 0; i < MLXSW_PCI_EQS_COUNT; i++) {
+               q = mlxsw_pci_eq_get(mlxsw_pci, i);
+               mlxsw_pci_queue_tasklet_schedule(q);
+       }
+       return IRQ_HANDLED;
+}
+
+static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
+                         const struct mlxsw_config_profile *profile)
+{
+       struct mlxsw_pci *mlxsw_pci = bus_priv;
+       struct pci_dev *pdev = mlxsw_pci->pdev;
+       char *mbox;
+       u16 num_pages;
+       int err;
+
+       mutex_init(&mlxsw_pci->cmd.lock);
+       init_waitqueue_head(&mlxsw_pci->cmd.wait);
+
+       mlxsw_pci->core = mlxsw_core;
+
+       mbox = mlxsw_cmd_mbox_alloc();
+       if (!mbox)
+               return -ENOMEM;
+       err = mlxsw_cmd_query_fw(mlxsw_core, mbox);
+       if (err)
+               goto err_query_fw;
+
+       mlxsw_pci->bus_info.fw_rev.major =
+               mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox);
+       mlxsw_pci->bus_info.fw_rev.minor =
+               mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox);
+       mlxsw_pci->bus_info.fw_rev.subminor =
+               mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox);
+
+       if (mlxsw_cmd_mbox_query_fw_cmd_interface_rev_get(mbox) != 1) {
+               dev_err(&pdev->dev, "Unsupported cmd interface revision ID queried from hw\n");
+               err = -EINVAL;
+               goto err_iface_rev;
+       }
+       if (mlxsw_cmd_mbox_query_fw_doorbell_page_bar_get(mbox) != 0) {
+               dev_err(&pdev->dev, "Unsupported doorbell page bar queried from hw\n");
+               err = -EINVAL;
+               goto err_doorbell_page_bar;
+       }
+
+       mlxsw_pci->doorbell_offset =
+               mlxsw_cmd_mbox_query_fw_doorbell_page_offset_get(mbox);
+
+       num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox);
+       err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages);
+       if (err)
+               goto err_fw_area_init;
+
+       err = mlxsw_pci_boardinfo(mlxsw_pci, mbox);
+       if (err)
+               goto err_boardinfo;
+
+       err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile);
+       if (err)
+               goto err_config_profile;
+
+       err = mlxsw_pci_aqs_init(mlxsw_pci, mbox);
+       if (err)
+               goto err_aqs_init;
+
+       err = request_irq(mlxsw_pci->msix_entry.vector,
+                         mlxsw_pci_eq_irq_handler, 0,
+                         mlxsw_pci_driver_name, mlxsw_pci);
+       if (err) {
+               dev_err(&pdev->dev, "IRQ request failed\n");
+               goto err_request_eq_irq;
+       }
+
+       goto mbox_put;
+
+err_request_eq_irq:
+       mlxsw_pci_aqs_fini(mlxsw_pci);
+err_aqs_init:
+err_config_profile:
+err_boardinfo:
+       mlxsw_pci_fw_area_fini(mlxsw_pci);
+err_fw_area_init:
+err_doorbell_page_bar:
+err_iface_rev:
+err_query_fw:
+mbox_put:
+       mlxsw_cmd_mbox_free(mbox);
+       return err;
+}
+
+static void mlxsw_pci_fini(void *bus_priv)
+{
+       struct mlxsw_pci *mlxsw_pci = bus_priv;
+
+       free_irq(mlxsw_pci->msix_entry.vector, mlxsw_pci);
+       mlxsw_pci_aqs_fini(mlxsw_pci);
+       mlxsw_pci_fw_area_fini(mlxsw_pci);
+}
+
+static struct mlxsw_pci_queue *
+mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci,
+                  const struct mlxsw_tx_info *tx_info)
+{
+       u8 sdqn = tx_info->local_port % mlxsw_pci_sdq_count(mlxsw_pci);
+
+       return mlxsw_pci_sdq_get(mlxsw_pci, sdqn);
+}
+
+static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb,
+                                 const struct mlxsw_tx_info *tx_info)
+{
+       struct mlxsw_pci *mlxsw_pci = bus_priv;
+       struct mlxsw_pci_queue *q;
+       struct mlxsw_pci_queue_elem_info *elem_info;
+       char *wqe;
+       int i;
+       int err;
+
+       if (skb_shinfo(skb)->nr_frags > MLXSW_PCI_WQE_SG_ENTRIES - 1) {
+               err = skb_linearize(skb);
+               if (err)
+                       return err;
+       }
+
+       q = mlxsw_pci_sdq_pick(mlxsw_pci, tx_info);
+       spin_lock_bh(&q->lock);
+       elem_info = mlxsw_pci_queue_elem_info_producer_get(q);
+       if (!elem_info) {
+               /* queue is full */
+               err = -EAGAIN;
+               goto unlock;
+       }
+       elem_info->u.sdq.skb = skb;
+
+       wqe = elem_info->elem;
+       mlxsw_pci_wqe_c_set(wqe, 1); /* always report completion */
+       mlxsw_pci_wqe_lp_set(wqe, !!tx_info->is_emad);
+       mlxsw_pci_wqe_type_set(wqe, MLXSW_PCI_WQE_TYPE_ETHERNET);
+
+       err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
+                                    skb_headlen(skb), DMA_TO_DEVICE);
+       if (err)
+               goto unlock;
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+               err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, i + 1,
+                                            skb_frag_address(frag),
+                                            skb_frag_size(frag),
+                                            DMA_TO_DEVICE);
+               if (err)
+                       goto unmap_frags;
+       }
+
+       /* Set unused sq entries byte count to zero. */
+       for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++)
+               mlxsw_pci_wqe_byte_count_set(wqe, i, 0);
+
+       /* Everything is set up, ring producer doorbell to get HW going */
+       q->producer_counter++;
+       mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
+
+       goto unlock;
+
+unmap_frags:
+       for (; i >= 0; i--)
+               mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE);
+unlock:
+       spin_unlock_bh(&q->lock);
+       return err;
+}
+
+static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
+                             u32 in_mod, bool out_mbox_direct,
+                             char *in_mbox, size_t in_mbox_size,
+                             char *out_mbox, size_t out_mbox_size,
+                             u8 *p_status)
+{
+       struct mlxsw_pci *mlxsw_pci = bus_priv;
+       dma_addr_t in_mapaddr = 0;
+       dma_addr_t out_mapaddr = 0;
+       bool evreq = mlxsw_pci->cmd.nopoll;
+       unsigned long timeout = msecs_to_jiffies(MLXSW_PCI_CIR_TIMEOUT_MSECS);
+       bool *p_wait_done = &mlxsw_pci->cmd.wait_done;
+       int err;
+
+       *p_status = MLXSW_CMD_STATUS_OK;
+
+       err = mutex_lock_interruptible(&mlxsw_pci->cmd.lock);
+       if (err)
+               return err;
+
+       if (in_mbox) {
+               in_mapaddr = pci_map_single(mlxsw_pci->pdev, in_mbox,
+                                           in_mbox_size, PCI_DMA_TODEVICE);
+               if (unlikely(pci_dma_mapping_error(mlxsw_pci->pdev,
+                                                  in_mapaddr))) {
+                       err = -EIO;
+                       goto err_in_mbox_map;
+               }
+       }
+       mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_HI, in_mapaddr >> 32);
+       mlxsw_pci_write32(mlxsw_pci, CIR_IN_PARAM_LO, in_mapaddr);
+
+       if (out_mbox) {
+               out_mapaddr = pci_map_single(mlxsw_pci->pdev, out_mbox,
+                                            out_mbox_size, PCI_DMA_FROMDEVICE);
+               if (unlikely(pci_dma_mapping_error(mlxsw_pci->pdev,
+                                                  out_mapaddr))) {
+                       err = -EIO;
+                       goto err_out_mbox_map;
+               }
+       }
+       mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_HI, out_mapaddr >> 32);
+       mlxsw_pci_write32(mlxsw_pci, CIR_OUT_PARAM_LO, out_mapaddr);
+
+       mlxsw_pci_write32(mlxsw_pci, CIR_IN_MODIFIER, in_mod);
+       mlxsw_pci_write32(mlxsw_pci, CIR_TOKEN, 0);
+
+       *p_wait_done = false;
+
+       wmb(); /* all needs to be written before we write control register */
+       mlxsw_pci_write32(mlxsw_pci, CIR_CTRL,
+                         MLXSW_PCI_CIR_CTRL_GO_BIT |
+                         (evreq ? MLXSW_PCI_CIR_CTRL_EVREQ_BIT : 0) |
+                         (opcode_mod << MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT) |
+                         opcode);
+
+       if (!evreq) {
+               unsigned long end;
+
+               end = jiffies + timeout;
+               do {
+                       u32 ctrl = mlxsw_pci_read32(mlxsw_pci, CIR_CTRL);
+
+                       if (!(ctrl & MLXSW_PCI_CIR_CTRL_GO_BIT)) {
+                               *p_wait_done = true;
+                               *p_status = ctrl >> MLXSW_PCI_CIR_CTRL_STATUS_SHIFT;
+                               break;
+                       }
+                       cond_resched();
+               } while (time_before(jiffies, end));
+       } else {
+               wait_event_timeout(mlxsw_pci->cmd.wait, *p_wait_done, timeout);
+               *p_status = mlxsw_pci->cmd.comp.status;
+       }
+
+       err = 0;
+       if (*p_wait_done) {
+               if (*p_status)
+                       err = -EIO;
+       } else {
+               err = -ETIMEDOUT;
+       }
+
+       if (!err && out_mbox && out_mbox_direct) {
+               /* Some commands does not use output param as address to mailbox
+                * but they store output directly into registers. In that case,
+                * copy registers into mbox buffer.
+                */
+               __be32 tmp;
+
+               if (!evreq) {
+                       tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci,
+                                                          CIR_OUT_PARAM_HI));
+                       memcpy(out_mbox, &tmp, sizeof(tmp));
+                       tmp = cpu_to_be32(mlxsw_pci_read32(mlxsw_pci,
+                                                          CIR_OUT_PARAM_LO));
+                       memcpy(out_mbox + sizeof(tmp), &tmp, sizeof(tmp));
+               }
+       }
+
+       if (out_mapaddr)
+               pci_unmap_single(mlxsw_pci->pdev, out_mapaddr, out_mbox_size,
+                                PCI_DMA_FROMDEVICE);
+
+       /* fall through */
+
+err_out_mbox_map:
+       if (in_mapaddr)
+               pci_unmap_single(mlxsw_pci->pdev, in_mapaddr, in_mbox_size,
+                                PCI_DMA_TODEVICE);
+err_in_mbox_map:
+       mutex_unlock(&mlxsw_pci->cmd.lock);
+
+       return err;
+}
+
+static const struct mlxsw_bus mlxsw_pci_bus = {
+       .kind           = "pci",
+       .init           = mlxsw_pci_init,
+       .fini           = mlxsw_pci_fini,
+       .skb_transmit   = mlxsw_pci_skb_transmit,
+       .cmd_exec       = mlxsw_pci_cmd_exec,
+};
+
+static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci)
+{
+       mlxsw_pci_write32(mlxsw_pci, SW_RESET, MLXSW_PCI_SW_RESET_RST_BIT);
+       /* Current firware does not let us know when the reset is done.
+        * So we just wait here for constant time and hope for the best.
+        */
+       msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
+       return 0;
+}
+
+static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct mlxsw_pci *mlxsw_pci;
+       int err;
+
+       mlxsw_pci = kzalloc(sizeof(*mlxsw_pci), GFP_KERNEL);
+       if (!mlxsw_pci)
+               return -ENOMEM;
+
+       err = pci_enable_device(pdev);
+       if (err) {
+               dev_err(&pdev->dev, "pci_enable_device failed\n");
+               goto err_pci_enable_device;
+       }
+
+       err = pci_request_regions(pdev, mlxsw_pci_driver_name);
+       if (err) {
+               dev_err(&pdev->dev, "pci_request_regions failed\n");
+               goto err_pci_request_regions;
+       }
+
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (!err) {
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+               if (err) {
+                       dev_err(&pdev->dev, "pci_set_consistent_dma_mask failed\n");
+                       goto err_pci_set_dma_mask;
+               }
+       } else {
+               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+               if (err) {
+                       dev_err(&pdev->dev, "pci_set_dma_mask failed\n");
+                       goto err_pci_set_dma_mask;
+               }
+       }
+
+       if (pci_resource_len(pdev, 0) < MLXSW_PCI_BAR0_SIZE) {
+               dev_err(&pdev->dev, "invalid PCI region size\n");
+               err = -EINVAL;
+               goto err_pci_resource_len_check;
+       }
+
+       mlxsw_pci->hw_addr = ioremap(pci_resource_start(pdev, 0),
+                                    pci_resource_len(pdev, 0));
+       if (!mlxsw_pci->hw_addr) {
+               dev_err(&pdev->dev, "ioremap failed\n");
+               err = -EIO;
+               goto err_ioremap;
+       }
+       pci_set_master(pdev);
+
+       mlxsw_pci->pdev = pdev;
+       pci_set_drvdata(pdev, mlxsw_pci);
+
+       err = mlxsw_pci_sw_reset(mlxsw_pci);
+       if (err) {
+               dev_err(&pdev->dev, "Software reset failed\n");
+               goto err_sw_reset;
+       }
+
+       err = pci_enable_msix_exact(pdev, &mlxsw_pci->msix_entry, 1);
+       if (err) {
+               dev_err(&pdev->dev, "MSI-X init failed\n");
+               goto err_msix_init;
+       }
+
+       mlxsw_pci->bus_info.device_kind = mlxsw_pci_device_kind_get(id);
+       mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev);
+       mlxsw_pci->bus_info.dev = &pdev->dev;
+
+       mlxsw_pci->dbg_dir = debugfs_create_dir(mlxsw_pci->bus_info.device_name,
+                                               mlxsw_pci_dbg_root);
+       if (!mlxsw_pci->dbg_dir) {
+               dev_err(&pdev->dev, "Failed to create debugfs dir\n");
+               goto err_dbg_create_dir;
+       }
+
+       err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info,
+                                            &mlxsw_pci_bus, mlxsw_pci);
+       if (err) {
+               dev_err(&pdev->dev, "cannot register bus device\n");
+               goto err_bus_device_register;
+       }
+
+       return 0;
+
+err_bus_device_register:
+       debugfs_remove_recursive(mlxsw_pci->dbg_dir);
+err_dbg_create_dir:
+       pci_disable_msix(mlxsw_pci->pdev);
+err_msix_init:
+err_sw_reset:
+       iounmap(mlxsw_pci->hw_addr);
+err_ioremap:
+err_pci_resource_len_check:
+err_pci_set_dma_mask:
+       pci_release_regions(pdev);
+err_pci_request_regions:
+       pci_disable_device(pdev);
+err_pci_enable_device:
+       kfree(mlxsw_pci);
+       return err;
+}
+
+static void mlxsw_pci_remove(struct pci_dev *pdev)
+{
+       struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
+
+       mlxsw_core_bus_device_unregister(mlxsw_pci->core);
+       debugfs_remove_recursive(mlxsw_pci->dbg_dir);
+       pci_disable_msix(mlxsw_pci->pdev);
+       iounmap(mlxsw_pci->hw_addr);
+       pci_release_regions(mlxsw_pci->pdev);
+       pci_disable_device(mlxsw_pci->pdev);
+       kfree(mlxsw_pci);
+}
+
+static struct pci_driver mlxsw_pci_driver = {
+       .name           = mlxsw_pci_driver_name,
+       .id_table       = mlxsw_pci_id_table,
+       .probe          = mlxsw_pci_probe,
+       .remove         = mlxsw_pci_remove,
+};
+
+static int __init mlxsw_pci_module_init(void)
+{
+       int err;
+
+       mlxsw_pci_dbg_root = debugfs_create_dir(mlxsw_pci_driver_name, NULL);
+       if (!mlxsw_pci_dbg_root)
+               return -ENOMEM;
+       err = pci_register_driver(&mlxsw_pci_driver);
+       if (err)
+               goto err_register_driver;
+       return 0;
+
+err_register_driver:
+       debugfs_remove_recursive(mlxsw_pci_dbg_root);
+       return err;
+}
+
+static void __exit mlxsw_pci_module_exit(void)
+{
+       pci_unregister_driver(&mlxsw_pci_driver);
+       debugfs_remove_recursive(mlxsw_pci_dbg_root);
+}
+
+module_init(mlxsw_pci_module_init);
+module_exit(mlxsw_pci_module_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox switch PCI interface driver");
+MODULE_DEVICE_TABLE(pci, mlxsw_pci_id_table);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h
new file mode 100644 (file)
index 0000000..6176a93
--- /dev/null
@@ -0,0 +1,220 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/pci.h
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_PCI_H
+#define _MLXSW_PCI_H
+
+#include <linux/bitops.h>
+
+#include "item.h"
+
+#define MLXSW_PCI_BAR0_SIZE            (1024 * 1024) /* 1MB */
+#define MLXSW_PCI_PAGE_SIZE            4096
+
+#define MLXSW_PCI_CIR_BASE                     0x71000
+#define MLXSW_PCI_CIR_IN_PARAM_HI              MLXSW_PCI_CIR_BASE
+#define MLXSW_PCI_CIR_IN_PARAM_LO              (MLXSW_PCI_CIR_BASE + 0x04)
+#define MLXSW_PCI_CIR_IN_MODIFIER              (MLXSW_PCI_CIR_BASE + 0x08)
+#define MLXSW_PCI_CIR_OUT_PARAM_HI             (MLXSW_PCI_CIR_BASE + 0x0C)
+#define MLXSW_PCI_CIR_OUT_PARAM_LO             (MLXSW_PCI_CIR_BASE + 0x10)
+#define MLXSW_PCI_CIR_TOKEN                    (MLXSW_PCI_CIR_BASE + 0x14)
+#define MLXSW_PCI_CIR_CTRL                     (MLXSW_PCI_CIR_BASE + 0x18)
+#define MLXSW_PCI_CIR_CTRL_GO_BIT              BIT(23)
+#define MLXSW_PCI_CIR_CTRL_EVREQ_BIT           BIT(22)
+#define MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT    12
+#define MLXSW_PCI_CIR_CTRL_STATUS_SHIFT                24
+#define MLXSW_PCI_CIR_TIMEOUT_MSECS            1000
+
+#define MLXSW_PCI_SW_RESET                     0xF0010
+#define MLXSW_PCI_SW_RESET_RST_BIT             BIT(0)
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS       5000
+
+#define MLXSW_PCI_DOORBELL_SDQ_OFFSET          0x000
+#define MLXSW_PCI_DOORBELL_RDQ_OFFSET          0x200
+#define MLXSW_PCI_DOORBELL_CQ_OFFSET           0x400
+#define MLXSW_PCI_DOORBELL_EQ_OFFSET           0x600
+#define MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET       0x800
+#define MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET       0xA00
+
+#define MLXSW_PCI_DOORBELL(offset, type_offset, num)   \
+       ((offset) + (type_offset) + (num) * 4)
+
+#define MLXSW_PCI_RDQS_COUNT   24
+#define MLXSW_PCI_SDQS_COUNT   24
+#define MLXSW_PCI_CQS_COUNT    (MLXSW_PCI_RDQS_COUNT + MLXSW_PCI_SDQS_COUNT)
+#define MLXSW_PCI_EQS_COUNT    2
+#define MLXSW_PCI_EQ_ASYNC_NUM 0
+#define MLXSW_PCI_EQ_COMP_NUM  1
+
+#define MLXSW_PCI_AQ_PAGES     8
+#define MLXSW_PCI_AQ_SIZE      (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES)
+#define MLXSW_PCI_WQE_SIZE     32 /* 32 bytes per element */
+#define MLXSW_PCI_CQE_SIZE     16 /* 16 bytes per element */
+#define MLXSW_PCI_EQE_SIZE     16 /* 16 bytes per element */
+#define MLXSW_PCI_WQE_COUNT    (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE)
+#define MLXSW_PCI_CQE_COUNT    (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE_SIZE)
+#define MLXSW_PCI_EQE_COUNT    (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE)
+#define MLXSW_PCI_EQE_UPDATE_COUNT     0x80
+
+#define MLXSW_PCI_WQE_SG_ENTRIES       3
+#define MLXSW_PCI_WQE_TYPE_ETHERNET    0xA
+
+/* pci_wqe_c
+ * If set it indicates that a completion should be reported upon
+ * execution of this descriptor.
+ */
+MLXSW_ITEM32(pci, wqe, c, 0x00, 31, 1);
+
+/* pci_wqe_lp
+ * Local Processing, set if packet should be processed by the local
+ * switch hardware:
+ * For Ethernet EMAD (Direct Route and non Direct Route) -
+ * must be set if packet destination is local device
+ * For InfiniBand CTL - must be set if packet destination is local device
+ * Otherwise it must be clear
+ * Local Process packets must not exceed the size of 2K (including payload
+ * and headers).
+ */
+MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1);
+
+/* pci_wqe_type
+ * Packet type.
+ */
+MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4);
+
+/* pci_wqe_byte_count
+ * Size of i-th scatter/gather entry, 0 if entry is unused.
+ */
+MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false);
+
+/* pci_wqe_address
+ * Physical address of i-th scatter/gather entry.
+ * Gather Entries must be 2Byte aligned.
+ */
+MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false);
+
+/* pci_cqe_lag
+ * Packet arrives from a port which is a LAG
+ */
+MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1);
+
+/* pci_cqe_system_port
+ * When lag=0: System port on which the packet was received
+ * When lag=1:
+ * bits [15:4] LAG ID on which the packet was received
+ * bits [3:0] sub_port on which the packet was received
+ */
+MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16);
+
+/* pci_cqe_wqe_counter
+ * WQE count of the WQEs completed on the associated dqn
+ */
+MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16);
+
+/* pci_cqe_byte_count
+ * Byte count of received packets including additional two
+ * Reserved Bytes that are append to the end of the frame.
+ * Reserved for Send CQE.
+ */
+MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14);
+
+/* pci_cqe_trap_id
+ * Trap ID that captured the packet.
+ */
+MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 8);
+
+/* pci_cqe_e
+ * CQE with Error.
+ */
+MLXSW_ITEM32(pci, cqe, e, 0x0C, 7, 1);
+
+/* pci_cqe_sr
+ * 1 - Send Queue
+ * 0 - Receive Queue
+ */
+MLXSW_ITEM32(pci, cqe, sr, 0x0C, 6, 1);
+
+/* pci_cqe_dqn
+ * Descriptor Queue (DQ) Number.
+ */
+MLXSW_ITEM32(pci, cqe, dqn, 0x0C, 1, 5);
+
+/* pci_cqe_owner
+ * Ownership bit.
+ */
+MLXSW_ITEM32(pci, cqe, owner, 0x0C, 0, 1);
+
+/* pci_eqe_event_type
+ * Event type.
+ */
+MLXSW_ITEM32(pci, eqe, event_type, 0x0C, 24, 8);
+#define MLXSW_PCI_EQE_EVENT_TYPE_COMP  0x00
+#define MLXSW_PCI_EQE_EVENT_TYPE_CMD   0x0A
+
+/* pci_eqe_event_sub_type
+ * Event type.
+ */
+MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8);
+
+/* pci_eqe_cqn
+ * Completion Queue that triggeret this EQE.
+ */
+MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7);
+
+/* pci_eqe_owner
+ * Ownership bit.
+ */
+MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1);
+
+/* pci_eqe_cmd_token
+ * Command completion event - token
+ */
+MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16);
+
+/* pci_eqe_cmd_status
+ * Command completion event - status
+ */
+MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8);
+
+/* pci_eqe_cmd_out_param_h
+ * Command completion event - output parameter - higher part
+ */
+MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32);
+
+/* pci_eqe_cmd_out_param_l
+ * Command completion event - output parameter - lower part
+ */
+MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32);
+
+#endif