IB/mlx5: LAG QP load balancing
authorAviv Heller <avivh@mellanox.com>
Sun, 18 Sep 2016 17:48:04 +0000 (20:48 +0300)
committerDoug Ledford <dledford@redhat.com>
Fri, 7 Oct 2016 20:54:22 +0000 (16:54 -0400)
When LAG is active, QP tx affinity (the physical port
to which a QP is affined, or the TIS in case of raw-eth)
is set in a round robin fashion during state transition
from RESET to INIT.

Signed-off-by: Aviv Heller <avivh@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/qp.c

index 53e1f1d..40fe1a6 100644 (file)
@@ -605,6 +605,7 @@ struct mlx5_roce {
        rwlock_t                netdev_lock;
        struct net_device       *netdev;
        struct notifier_block   nb;
+       atomic_t                next_port;
 };
 
 struct mlx5_ib_dev {
index cf181ef..2ec88c6 100644 (file)
@@ -1873,7 +1873,8 @@ static void get_cqs(enum ib_qp_type qp_type,
 }
 
 static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
-                               const struct mlx5_modify_raw_qp_param *raw_qp_param);
+                               const struct mlx5_modify_raw_qp_param *raw_qp_param,
+                               u8 lag_tx_affinity);
 
 static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
 {
@@ -1902,7 +1903,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
                                .operation = MLX5_CMD_OP_2RST_QP
                        };
 
-                       err = modify_raw_packet_qp(dev, qp, &raw_qp_param);
+                       err = modify_raw_packet_qp(dev, qp, &raw_qp_param, 0);
                }
                if (err)
                        mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
@@ -2166,6 +2167,31 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
        return err;
 }
 
+static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
+                                        struct mlx5_ib_sq *sq, u8 tx_affinity)
+{
+       void *in;
+       void *tisc;
+       int inlen;
+       int err;
+
+       inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1);
+
+       tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
+       MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
+
+       err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+
+       kvfree(in);
+
+       return err;
+}
+
 static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
                         const struct ib_ah_attr *ah,
                         struct mlx5_qp_path *path, u8 port, int attr_mask,
@@ -2446,7 +2472,8 @@ out:
 }
 
 static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
-                               const struct mlx5_modify_raw_qp_param *raw_qp_param)
+                               const struct mlx5_modify_raw_qp_param *raw_qp_param,
+                               u8 tx_affinity)
 {
        struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
        struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
@@ -2487,8 +2514,16 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
                        return err;
        }
 
-       if (qp->sq.wqe_cnt)
+       if (qp->sq.wqe_cnt) {
+               if (tx_affinity) {
+                       err = modify_raw_packet_tx_affinity(dev->mdev, sq,
+                                                           tx_affinity);
+                       if (err)
+                               return err;
+               }
+
                return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+       }
 
        return 0;
 }
@@ -2547,6 +2582,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
        int mlx5_st;
        int err;
        u16 op;
+       u8 tx_affinity = 0;
 
        context = kzalloc(sizeof(*context), GFP_KERNEL);
        if (!context)
@@ -2576,6 +2612,23 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                }
        }
 
+       if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
+               if ((ibqp->qp_type == IB_QPT_RC) ||
+                   (ibqp->qp_type == IB_QPT_UD &&
+                    !(qp->flags & MLX5_IB_QP_SQPN_QP1)) ||
+                   (ibqp->qp_type == IB_QPT_UC) ||
+                   (ibqp->qp_type == IB_QPT_RAW_PACKET) ||
+                   (ibqp->qp_type == IB_QPT_XRC_INI) ||
+                   (ibqp->qp_type == IB_QPT_XRC_TGT)) {
+                       if (mlx5_lag_is_active(dev->mdev)) {
+                               tx_affinity = (unsigned int)atomic_add_return(1,
+                                               &dev->roce.next_port) %
+                                               MLX5_MAX_PORTS + 1;
+                               context->flags |= cpu_to_be32(tx_affinity << 24);
+                       }
+               }
+       }
+
        if (is_sqp(ibqp->qp_type)) {
                context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
        } else if (ibqp->qp_type == IB_QPT_UD ||
@@ -2724,7 +2777,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                        raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
                        raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
                }
-               err = modify_raw_packet_qp(dev, qp, &raw_qp_param);
+               err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity);
        } else {
                err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
                                          &base->mqp);