Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
[cascardo/linux.git] / drivers / infiniband / hw / mlx5 / main.c
index e6fdc82..2217477 100644 (file)
@@ -37,7 +37,6 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
-#include <linux/io-mapping.h>
 #if defined(CONFIG_X86)
 #include <asm/pat.h>
 #endif
@@ -54,7 +53,6 @@
 #include <linux/in.h>
 #include <linux/etherdevice.h>
 #include <linux/mlx5/fs.h>
-#include "user.h"
 #include "mlx5_ib.h"
 
 #define DRIVER_NAME "mlx5_ib"
@@ -107,13 +105,42 @@ static int mlx5_netdev_event(struct notifier_block *this,
        struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
                                                 roce.nb);
 
-       if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
-               return NOTIFY_DONE;
+       switch (event) {
+       case NETDEV_REGISTER:
+       case NETDEV_UNREGISTER:
+               write_lock(&ibdev->roce.netdev_lock);
+               if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
+                       ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ?
+                                            NULL : ndev;
+               write_unlock(&ibdev->roce.netdev_lock);
+               break;
+
+       case NETDEV_UP:
+       case NETDEV_DOWN: {
+               struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
+               struct net_device *upper = NULL;
+
+               if (lag_ndev) {
+                       upper = netdev_master_upper_dev_get(lag_ndev);
+                       dev_put(lag_ndev);
+               }
+
+               if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
+                   && ibdev->ib_active) {
+                       struct ib_event ibev = {0};
 
-       write_lock(&ibdev->roce.netdev_lock);
-       if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
-               ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
-       write_unlock(&ibdev->roce.netdev_lock);
+                       ibev.device = &ibdev->ib_dev;
+                       ibev.event = (event == NETDEV_UP) ?
+                                    IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+                       ibev.element.port_num = 1;
+                       ib_dispatch_event(&ibev);
+               }
+               break;
+       }
+
+       default:
+               break;
+       }
 
        return NOTIFY_DONE;
 }
@@ -124,6 +151,10 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
        struct mlx5_ib_dev *ibdev = to_mdev(device);
        struct net_device *ndev;
 
+       ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
+       if (ndev)
+               return ndev;
+
        /* Ensure ndev does not disappear before we invoke dev_hold()
         */
        read_lock(&ibdev->roce.netdev_lock);
@@ -139,7 +170,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
                                struct ib_port_attr *props)
 {
        struct mlx5_ib_dev *dev = to_mdev(device);
-       struct net_device *ndev;
+       struct net_device *ndev, *upper;
        enum ib_mtu ndev_ib_mtu;
        u16 qkey_viol_cntr;
 
@@ -163,6 +194,17 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
        if (!ndev)
                return 0;
 
+       if (mlx5_lag_is_active(dev->mdev)) {
+               rcu_read_lock();
+               upper = netdev_master_upper_dev_get_rcu(ndev);
+               if (upper) {
+                       dev_put(ndev);
+                       ndev = upper;
+                       dev_hold(ndev);
+               }
+               rcu_read_unlock();
+       }
+
        if (netif_running(ndev) && netif_carrier_ok(ndev)) {
                props->state      = IB_PORT_ACTIVE;
                props->phys_state = 5;
@@ -285,7 +327,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
 
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
-       return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+       if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
+               return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+       return 0;
 }
 
 enum {
@@ -428,7 +472,7 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
 }
 
 struct mlx5_reg_node_desc {
-       u8      desc[64];
+       u8      desc[IB_DEVICE_NODE_DESC_MAX];
 };
 
 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
@@ -876,13 +920,13 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
         * If possible, pass node desc to FW, so it can generate
         * a 144 trap.  If cmd fails, just ignore.
         */
-       memcpy(&in, props->node_desc, 64);
+       memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
        err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
                                   sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
        if (err)
                return err;
 
-       memcpy(ibdev->node_desc, props->node_desc, 64);
+       memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
 
        return err;
 }
@@ -1425,21 +1469,51 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
        return 0;
 }
 
-static bool outer_header_zero(u32 *match_criteria)
+enum {
+       MATCH_CRITERIA_ENABLE_OUTER_BIT,
+       MATCH_CRITERIA_ENABLE_MISC_BIT,
+       MATCH_CRITERIA_ENABLE_INNER_BIT
+};
+
+#define HEADER_IS_ZERO(match_criteria, headers)                                   \
+       !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
+                   0, MLX5_FLD_SZ_BYTES(fte_match_param, headers)))       \
+
+static u8 get_match_criteria_enable(u32 *match_criteria)
 {
-       int size = MLX5_ST_SZ_BYTES(fte_match_param);
-       char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria,
-                                            outer_headers);
+       u8 match_criteria_enable;
+
+       match_criteria_enable =
+               (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
+               MATCH_CRITERIA_ENABLE_OUTER_BIT;
+       match_criteria_enable |=
+               (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
+               MATCH_CRITERIA_ENABLE_MISC_BIT;
+       match_criteria_enable |=
+               (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
+               MATCH_CRITERIA_ENABLE_INNER_BIT;
 
-       return outer_headers_c[0] == 0 && !memcmp(outer_headers_c,
-                                                 outer_headers_c + 1,
-                                                 size - 1);
+       return match_criteria_enable;
+}
+
+static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
+{
+       MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
+       MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
+}
+
+static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
+{
+       MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
+       MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
+       MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
+       MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
 }
 
 #define LAST_ETH_FIELD vlan_tag
 #define LAST_IB_FIELD sl
-#define LAST_IPV4_FIELD dst_ip
-#define LAST_IPV6_FIELD dst_ip
+#define LAST_IPV4_FIELD tos
+#define LAST_IPV6_FIELD traffic_class
 #define LAST_TCP_UDP_FIELD src_port
 
 /* Field is the last supported field */
@@ -1457,6 +1531,11 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
                                             outer_headers);
        void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
                                             outer_headers);
+       void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
+                                          misc_parameters);
+       void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
+                                          misc_parameters);
+
        switch (ib_spec->type) {
        case IB_FLOW_SPEC_ETH:
                if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
@@ -1469,6 +1548,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
                                             dmac_47_16),
                                ib_spec->eth.val.dst_mac);
 
+               ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+                                            smac_47_16),
+                               ib_spec->eth.mask.src_mac);
+               ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+                                            smac_47_16),
+                               ib_spec->eth.val.src_mac);
+
                if (ib_spec->eth.mask.vlan_tag) {
                        MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
                                 vlan_tag, 1);
@@ -1524,6 +1610,12 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
                       &ib_spec->ipv4.val.dst_ip,
                       sizeof(ib_spec->ipv4.val.dst_ip));
+
+               set_tos(outer_headers_c, outer_headers_v,
+                       ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
+
+               set_proto(outer_headers_c, outer_headers_v,
+                         ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
                break;
        case IB_FLOW_SPEC_IPV6:
                if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
@@ -1550,6 +1642,21 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
                       &ib_spec->ipv6.val.dst_ip,
                       sizeof(ib_spec->ipv6.val.dst_ip));
+
+               set_tos(outer_headers_c, outer_headers_v,
+                       ib_spec->ipv6.mask.traffic_class,
+                       ib_spec->ipv6.val.traffic_class);
+
+               set_proto(outer_headers_c, outer_headers_v,
+                         ib_spec->ipv6.mask.next_hdr,
+                         ib_spec->ipv6.val.next_hdr);
+
+               MLX5_SET(fte_match_set_misc, misc_params_c,
+                        outer_ipv6_flow_label,
+                        ntohl(ib_spec->ipv6.mask.flow_label));
+               MLX5_SET(fte_match_set_misc, misc_params_v,
+                        outer_ipv6_flow_label,
+                        ntohl(ib_spec->ipv6.val.flow_label));
                break;
        case IB_FLOW_SPEC_TCP:
                if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
@@ -1797,9 +1904,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
                ib_flow += ((union ib_flow_spec *)ib_flow)->size;
        }
 
-       /* Outer header support only */
-       spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria))
-               << 0;
+       spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
        action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
                MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
        handler->rule = mlx5_add_flow_rule(ft, spec,
@@ -1953,6 +2058,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
                                           int domain)
 {
        struct mlx5_ib_dev *dev = to_mdev(qp->device);
+       struct mlx5_ib_qp *mqp = to_mqp(qp);
        struct mlx5_ib_flow_handler *handler = NULL;
        struct mlx5_flow_destination *dst = NULL;
        struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
@@ -1988,7 +2094,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
        }
 
        dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-       dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
+       if (mqp->flags & MLX5_IB_QP_RSS)
+               dst->tir_num = mqp->rss_qp.tirn;
+       else
+               dst->tir_num = mqp->raw_packet_qp.rq.tirn;
 
        if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
                if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)  {
@@ -2213,14 +2322,19 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
                break;
 
        case MLX5_DEV_EVENT_PORT_UP:
-               ibev.event = IB_EVENT_PORT_ACTIVE;
-               port = (u8)param;
-               break;
-
        case MLX5_DEV_EVENT_PORT_DOWN:
        case MLX5_DEV_EVENT_PORT_INITIALIZED:
-               ibev.event = IB_EVENT_PORT_ERR;
                port = (u8)param;
+
+               /* In RoCE, port up/down events are handled in
+                * mlx5_netdev_event().
+                */
+               if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
+                       IB_LINK_LAYER_ETHERNET)
+                       return;
+
+               ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ?
+                            IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
                break;
 
        case MLX5_DEV_EVENT_LID_CHANGE:
@@ -2625,30 +2739,88 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str,
                       fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
 }
 
+static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_core_dev *mdev = dev->mdev;
+       struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev,
+                                                                MLX5_FLOW_NAMESPACE_LAG);
+       struct mlx5_flow_table *ft;
+       int err;
+
+       if (!ns || !mlx5_lag_is_active(mdev))
+               return 0;
+
+       err = mlx5_cmd_create_vport_lag(mdev);
+       if (err)
+               return err;
+
+       ft = mlx5_create_lag_demux_flow_table(ns, 0, 0);
+       if (IS_ERR(ft)) {
+               err = PTR_ERR(ft);
+               goto err_destroy_vport_lag;
+       }
+
+       dev->flow_db.lag_demux_ft = ft;
+       return 0;
+
+err_destroy_vport_lag:
+       mlx5_cmd_destroy_vport_lag(mdev);
+       return err;
+}
+
+static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_core_dev *mdev = dev->mdev;
+
+       if (dev->flow_db.lag_demux_ft) {
+               mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
+               dev->flow_db.lag_demux_ft = NULL;
+
+               mlx5_cmd_destroy_vport_lag(mdev);
+       }
+}
+
+static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev)
+{
+       if (dev->roce.nb.notifier_call) {
+               unregister_netdevice_notifier(&dev->roce.nb);
+               dev->roce.nb.notifier_call = NULL;
+       }
+}
+
 static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
 {
        int err;
 
        dev->roce.nb.notifier_call = mlx5_netdev_event;
        err = register_netdevice_notifier(&dev->roce.nb);
-       if (err)
+       if (err) {
+               dev->roce.nb.notifier_call = NULL;
                return err;
+       }
 
        err = mlx5_nic_vport_enable_roce(dev->mdev);
        if (err)
                goto err_unregister_netdevice_notifier;
 
+       err = mlx5_roce_lag_init(dev);
+       if (err)
+               goto err_disable_roce;
+
        return 0;
 
+err_disable_roce:
+       mlx5_nic_vport_disable_roce(dev->mdev);
+
 err_unregister_netdevice_notifier:
-       unregister_netdevice_notifier(&dev->roce.nb);
+       mlx5_remove_roce_notifier(dev);
        return err;
 }
 
 static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
 {
+       mlx5_roce_lag_cleanup(dev);
        mlx5_nic_vport_disable_roce(dev->mdev);
-       unregister_netdevice_notifier(&dev->roce.nb);
 }
 
 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
@@ -2763,6 +2935,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        struct mlx5_ib_dev *dev;
        enum rdma_link_layer ll;
        int port_type_cap;
+       const char *name;
        int err;
        int i;
 
@@ -2795,7 +2968,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 
        MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
 
-       strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
+       if (!mlx5_lag_is_active(mdev))
+               name = "mlx5_%d";
+       else
+               name = "mlx5_bond_%d";
+
+       strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX);
        dev->ib_dev.owner               = THIS_MODULE;
        dev->ib_dev.node_type           = RDMA_NODE_IB_CA;
        dev->ib_dev.local_dma_lkey      = 0 /* not supported for now */;
@@ -2997,8 +3175,10 @@ err_rsrc:
        destroy_dev_resources(&dev->devr);
 
 err_disable_roce:
-       if (ll == IB_LINK_LAYER_ETHERNET)
+       if (ll == IB_LINK_LAYER_ETHERNET) {
                mlx5_disable_roce(dev);
+               mlx5_remove_roce_notifier(dev);
+       }
 
 err_free_port:
        kfree(dev->port);
@@ -3014,6 +3194,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
        struct mlx5_ib_dev *dev = context;
        enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
 
+       mlx5_remove_roce_notifier(dev);
        ib_unregister_device(&dev->ib_dev);
        mlx5_ib_dealloc_q_counters(dev);
        destroy_umrc_res(dev);