drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c

   1 /*
   2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
   3  * driver for Linux.
   4  *
   5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
   6  *
   7  * This software is available to you under a choice of one of two
   8  * licenses.  You may choose to be licensed under the terms of the GNU
   9  * General Public License (GPL) Version 2, available from the file
  10  * COPYING in the main directory of this source tree, or the
  11  * OpenIB.org BSD license below:
  12  *
  13  *     Redistribution and use in source and binary forms, with or
  14  *     without modification, are permitted provided that the following
  15  *     conditions are met:
  16  *
  17  *      - Redistributions of source code must retain the above
  18  *        copyright notice, this list of conditions and the following
  19  *        disclaimer.
  20  *
  21  *      - Redistributions in binary form must reproduce the above
  22  *        copyright notice, this list of conditions and the following
  23  *        disclaimer in the documentation and/or other materials
  24  *        provided with the distribution.
  25  *
  26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33  * SOFTWARE.
  34  */
  35
  36 #include <linux/module.h>
  37 #include <linux/moduleparam.h>
  38 #include <linux/init.h>
  39 #include <linux/pci.h>
  40 #include <linux/dma-mapping.h>
  41 #include <linux/netdevice.h>
  42 #include <linux/etherdevice.h>
  43 #include <linux/debugfs.h>
  44 #include <linux/ethtool.h>
  45
  46 #include "t4vf_common.h"
  47 #include "t4vf_defs.h"
  48
  49 #include "../cxgb4/t4_regs.h"
  50 #include "../cxgb4/t4_msg.h"
  51
  52 /*
  53  * Generic information about the driver.
  54  */
  55 #define DRV_VERSION "1.0.0"
  56 #define DRV_DESC "Chelsio T4 Virtual Function (VF) Network Driver"
  57
  58 /*
  59  * Module Parameters.
  60  * ==================
  61  */
  62
  63 /*
  64  * Default ethtool "message level" for adapters.
  65  */
  66 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
  67                          NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
  68                          NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
  69
  70 static int dflt_msg_enable = DFLT_MSG_ENABLE;
  71
  72 module_param(dflt_msg_enable, int, 0644);
  73 MODULE_PARM_DESC(dflt_msg_enable,
  74                  "default adapter ethtool message level bitmap");
  75
  76 /*
  77  * The driver uses the best interrupt scheme available on a platform in the
  78  * order MSI-X then MSI.  This parameter determines which of these schemes the
  79  * driver may consider as follows:
  80  *
  81  *     msi = 2: choose from among MSI-X and MSI
  82  *     msi = 1: only consider MSI interrupts
  83  *
  84  * Note that unlike the Physical Function driver, this Virtual Function driver
  85  * does _not_ support legacy INTx interrupts (this limitation is mandated by
  86  * the PCI-E SR-IOV standard).
  87  */
  88 #define MSI_MSIX        2
  89 #define MSI_MSI         1
  90 #define MSI_DEFAULT     MSI_MSIX
  91
  92 static int msi = MSI_DEFAULT;
  93
  94 module_param(msi, int, 0644);
  95 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
  96
  97 /*
  98  * Fundamental constants.
  99  * ======================
 100  */
 101
 102 enum {
 103         MAX_TXQ_ENTRIES         = 16384,
 104         MAX_RSPQ_ENTRIES        = 16384,
 105         MAX_RX_BUFFERS          = 16384,
 106
 107         MIN_TXQ_ENTRIES         = 32,
 108         MIN_RSPQ_ENTRIES        = 128,
 109         MIN_FL_ENTRIES          = 16,
 110
 111         /*
 112          * For purposes of manipulating the Free List size we need to
 113          * recognize that Free Lists are actually Egress Queues (the host
 114          * produces free buffers which the hardware consumes), Egress Queues
 115          * indices are all in units of Egress Context Units bytes, and free
 116          * list entries are 64-bit PCI DMA addresses.  And since the state of
 117          * the Producer Index == the Consumer Index implies an EMPTY list, we
 118          * always have at least one Egress Unit's worth of Free List entries
 119          * unused.  See sge.c for more details ...
 120          */
 121         EQ_UNIT = SGE_EQ_IDXSIZE,
 122         FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
 123         MIN_FL_RESID = FL_PER_EQ_UNIT,
 124 };
 125
 126 /*
 127  * Global driver state.
 128  * ====================
 129  */
 130
 131 static struct dentry *cxgb4vf_debugfs_root;
 132
 133 /*
 134  * OS "Callback" functions.
 135  * ========================
 136  */
 137
 138 /*
 139  * The link status has changed on the indicated "port" (Virtual Interface).
 140  */
 141 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 142 {
 143         struct net_device *dev = adapter->port[pidx];
 144
 145         /*
 146          * If the port is disabled or the current recorded "link up"
 147          * status matches the new status, just return.
 148          */
 149         if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
 150                 return;
 151
 152         /*
 153          * Tell the OS that the link status has changed and print a short
 154          * informative message on the console about the event.
 155          */
 156         if (link_ok) {
 157                 const char *s;
 158                 const char *fc;
 159                 const struct port_info *pi = netdev_priv(dev);
 160
 161                 netif_carrier_on(dev);
 162
 163                 switch (pi->link_cfg.speed) {
 164                 case SPEED_10000:
 165                         s = "10Gbps";
 166                         break;
 167
 168                 case SPEED_1000:
 169                         s = "1000Mbps";
 170                         break;
 171
 172                 case SPEED_100:
 173                         s = "100Mbps";
 174                         break;
 175
 176                 default:
 177                         s = "unknown";
 178                         break;
 179                 }
 180
 181                 switch (pi->link_cfg.fc) {
 182                 case PAUSE_RX:
 183                         fc = "RX";
 184                         break;
 185
 186                 case PAUSE_TX:
 187                         fc = "TX";
 188                         break;
 189
 190                 case PAUSE_RX|PAUSE_TX:
 191                         fc = "RX/TX";
 192                         break;
 193
 194                 default:
 195                         fc = "no";
 196                         break;
 197                 }
 198
 199                 printk(KERN_INFO "%s: link up, %s, full-duplex, %s PAUSE\n",
 200                        dev->name, s, fc);
 201         } else {
 202                 netif_carrier_off(dev);
 203                 printk(KERN_INFO "%s: link down\n", dev->name);
 204         }
 205 }
 206
 207 /*
 208  * Net device operations.
 209  * ======================
 210  */
 211
 212
 213
 214
 215 /*
 216  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
 217  * Interface).
 218  */
 219 static int link_start(struct net_device *dev)
 220 {
 221         int ret;
 222         struct port_info *pi = netdev_priv(dev);
 223
 224         /*
 225          * We do not set address filters and promiscuity here, the stack does
 226          * that step explicitly. Enable vlan accel.
 227          */
 228         ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
 229                               true);
 230         if (ret == 0) {
 231                 ret = t4vf_change_mac(pi->adapter, pi->viid,
 232                                       pi->xact_addr_filt, dev->dev_addr, true);
 233                 if (ret >= 0) {
 234                         pi->xact_addr_filt = ret;
 235                         ret = 0;
 236                 }
 237         }
 238
 239         /*
 240          * We don't need to actually "start the link" itself since the
 241          * firmware will do that for us when the first Virtual Interface
 242          * is enabled on a port.
 243          */
 244         if (ret == 0)
 245                 ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
 246         return ret;
 247 }
 248
 249 /*
 250  * Name the MSI-X interrupts.
 251  */
 252 static void name_msix_vecs(struct adapter *adapter)
 253 {
 254         int namelen = sizeof(adapter->msix_info[0].desc) - 1;
 255         int pidx;
 256
 257         /*
 258          * Firmware events.
 259          */
 260         snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
 261                  "%s-FWeventq", adapter->name);
 262         adapter->msix_info[MSIX_FW].desc[namelen] = 0;
 263
 264         /*
 265          * Ethernet queues.
 266          */
 267         for_each_port(adapter, pidx) {
 268                 struct net_device *dev = adapter->port[pidx];
 269                 const struct port_info *pi = netdev_priv(dev);
 270                 int qs, msi;
 271
 272                 for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
 273                         snprintf(adapter->msix_info[msi].desc, namelen,
 274                                  "%s-%d", dev->name, qs);
 275                         adapter->msix_info[msi].desc[namelen] = 0;
 276                 }
 277         }
 278 }
 279
 280 /*
 281  * Request all of our MSI-X resources.
 282  */
 283 static int request_msix_queue_irqs(struct adapter *adapter)
 284 {
 285         struct sge *s = &adapter->sge;
 286         int rxq, msi, err;
 287
 288         /*
 289          * Firmware events.
 290          */
 291         err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
 292                           0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
 293         if (err)
 294                 return err;
 295
 296         /*
 297          * Ethernet queues.
 298          */
 299         msi = MSIX_IQFLINT;
 300         for_each_ethrxq(s, rxq) {
 301                 err = request_irq(adapter->msix_info[msi].vec,
 302                                   t4vf_sge_intr_msix, 0,
 303                                   adapter->msix_info[msi].desc,
 304                                   &s->ethrxq[rxq].rspq);
 305                 if (err)
 306                         goto err_free_irqs;
 307                 msi++;
 308         }
 309         return 0;
 310
 311 err_free_irqs:
 312         while (--rxq >= 0)
 313                 free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
 314         free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 315         return err;
 316 }
 317
 318 /*
 319  * Free our MSI-X resources.
 320  */
 321 static void free_msix_queue_irqs(struct adapter *adapter)
 322 {
 323         struct sge *s = &adapter->sge;
 324         int rxq, msi;
 325
 326         free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 327         msi = MSIX_IQFLINT;
 328         for_each_ethrxq(s, rxq)
 329                 free_irq(adapter->msix_info[msi++].vec,
 330                          &s->ethrxq[rxq].rspq);
 331 }
 332
 333 /*
 334  * Turn on NAPI and start up interrupts on a response queue.
 335  */
 336 static void qenable(struct sge_rspq *rspq)
 337 {
 338         napi_enable(&rspq->napi);
 339
 340         /*
 341          * 0-increment the Going To Sleep register to start the timer and
 342          * enable interrupts.
 343          */
 344         t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 345                      CIDXINC(0) |
 346                      SEINTARM(rspq->intr_params) |
 347                      INGRESSQID(rspq->cntxt_id));
 348 }
 349
 350 /*
 351  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
 352  */
 353 static void enable_rx(struct adapter *adapter)
 354 {
 355         int rxq;
 356         struct sge *s = &adapter->sge;
 357
 358         for_each_ethrxq(s, rxq)
 359                 qenable(&s->ethrxq[rxq].rspq);
 360         qenable(&s->fw_evtq);
 361
 362         /*
 363          * The interrupt queue doesn't use NAPI so we do the 0-increment of
 364          * its Going To Sleep register here to get it started.
 365          */
 366         if (adapter->flags & USING_MSI)
 367                 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 368                              CIDXINC(0) |
 369                              SEINTARM(s->intrq.intr_params) |
 370                              INGRESSQID(s->intrq.cntxt_id));
 371
 372 }
 373
 374 /*
 375  * Wait until all NAPI handlers are descheduled.
 376  */
 377 static void quiesce_rx(struct adapter *adapter)
 378 {
 379         struct sge *s = &adapter->sge;
 380         int rxq;
 381
 382         for_each_ethrxq(s, rxq)
 383                 napi_disable(&s->ethrxq[rxq].rspq.napi);
 384         napi_disable(&s->fw_evtq.napi);
 385 }
 386
 387 /*
 388  * Response queue handler for the firmware event queue.
 389  */
 390 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
 391                           const struct pkt_gl *gl)
 392 {
 393         /*
 394          * Extract response opcode and get pointer to CPL message body.
 395          */
 396         struct adapter *adapter = rspq->adapter;
 397         u8 opcode = ((const struct rss_header *)rsp)->opcode;
 398         void *cpl = (void *)(rsp + 1);
 399
 400         switch (opcode) {
 401         case CPL_FW6_MSG: {
 402                 /*
 403                  * We've received an asynchronous message from the firmware.
 404                  */
 405                 const struct cpl_fw6_msg *fw_msg = cpl;
 406                 if (fw_msg->type == FW6_TYPE_CMD_RPL)
 407                         t4vf_handle_fw_rpl(adapter, fw_msg->data);
 408                 break;
 409         }
 410
 411         case CPL_SGE_EGR_UPDATE: {
 412                 /*
 413                  * We've received an Egress Queue Status Update message.  We
 414                  * get these, if the SGE is configured to send these when the
 415                  * firmware passes certain points in processing our TX
 416                  * Ethernet Queue or if we make an explicit request for one.
 417                  * We use these updates to determine when we may need to
 418                  * restart a TX Ethernet Queue which was stopped for lack of
 419                  * free TX Queue Descriptors ...
 420                  */
 421                 const struct cpl_sge_egr_update *p = (void *)cpl;
 422                 unsigned int qid = EGR_QID(be32_to_cpu(p->opcode_qid));
 423                 struct sge *s = &adapter->sge;
 424                 struct sge_txq *tq;
 425                 struct sge_eth_txq *txq;
 426                 unsigned int eq_idx;
 427
 428                 /*
 429                  * Perform sanity checking on the Queue ID to make sure it
 430                  * really refers to one of our TX Ethernet Egress Queues which
 431                  * is active and matches the queue's ID.  None of these error
 432                  * conditions should ever happen so we may want to either make
 433                  * them fatal and/or conditionalized under DEBUG.
 434                  */
 435                 eq_idx = EQ_IDX(s, qid);
 436                 if (unlikely(eq_idx >= MAX_EGRQ)) {
 437                         dev_err(adapter->pdev_dev,
 438                                 "Egress Update QID %d out of range\n", qid);
 439                         break;
 440                 }
 441                 tq = s->egr_map[eq_idx];
 442                 if (unlikely(tq == NULL)) {
 443                         dev_err(adapter->pdev_dev,
 444                                 "Egress Update QID %d TXQ=NULL\n", qid);
 445                         break;
 446                 }
 447                 txq = container_of(tq, struct sge_eth_txq, q);
 448                 if (unlikely(tq->abs_id != qid)) {
 449                         dev_err(adapter->pdev_dev,
 450                                 "Egress Update QID %d refers to TXQ %d\n",
 451                                 qid, tq->abs_id);
 452                         break;
 453                 }
 454
 455                 /*
 456                  * Restart a stopped TX Queue which has less than half of its
 457                  * TX ring in use ...
 458                  */
 459                 txq->q.restarts++;
 460                 netif_tx_wake_queue(txq->txq);
 461                 break;
 462         }
 463
 464         default:
 465                 dev_err(adapter->pdev_dev,
 466                         "unexpected CPL %#x on FW event queue\n", opcode);
 467         }
 468
 469         return 0;
 470 }
 471
 472 /*
 473  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
 474  * to use and initializes them.  We support multiple "Queue Sets" per port if
 475  * we have MSI-X, otherwise just one queue set per port.
 476  */
 477 static int setup_sge_queues(struct adapter *adapter)
 478 {
 479         struct sge *s = &adapter->sge;
 480         int err, pidx, msix;
 481
 482         /*
 483          * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
 484          * state.
 485          */
 486         bitmap_zero(s->starving_fl, MAX_EGRQ);
 487
 488         /*
 489          * If we're using MSI interrupt mode we need to set up a "forwarded
 490          * interrupt" queue which we'll set up with our MSI vector.  The rest
 491          * of the ingress queues will be set up to forward their interrupts to
 492          * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
 493          * the intrq's queue ID as the interrupt forwarding queue for the
 494          * subsequent calls ...
 495          */
 496         if (adapter->flags & USING_MSI) {
 497                 err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
 498                                          adapter->port[0], 0, NULL, NULL);
 499                 if (err)
 500                         goto err_free_queues;
 501         }
 502
 503         /*
 504          * Allocate our ingress queue for asynchronous firmware messages.
 505          */
 506         err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
 507                                  MSIX_FW, NULL, fwevtq_handler);
 508         if (err)
 509                 goto err_free_queues;
 510
 511         /*
 512          * Allocate each "port"'s initial Queue Sets.  These can be changed
 513          * later on ... up to the point where any interface on the adapter is
 514          * brought up at which point lots of things get nailed down
 515          * permanently ...
 516          */
 517         msix = MSIX_IQFLINT;
 518         for_each_port(adapter, pidx) {
 519                 struct net_device *dev = adapter->port[pidx];
 520                 struct port_info *pi = netdev_priv(dev);
 521                 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 522                 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 523                 int qs;
 524
 525                 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 526                         err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
 527                                                  dev, msix++,
 528                                                  &rxq->fl, t4vf_ethrx_handler);
 529                         if (err)
 530                                 goto err_free_queues;
 531
 532                         err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
 533                                              netdev_get_tx_queue(dev, qs),
 534                                              s->fw_evtq.cntxt_id);
 535                         if (err)
 536                                 goto err_free_queues;
 537
 538                         rxq->rspq.idx = qs;
 539                         memset(&rxq->stats, 0, sizeof(rxq->stats));
 540                 }
 541         }
 542
 543         /*
 544          * Create the reverse mappings for the queues.
 545          */
 546         s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
 547         s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
 548         IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
 549         for_each_port(adapter, pidx) {
 550                 struct net_device *dev = adapter->port[pidx];
 551                 struct port_info *pi = netdev_priv(dev);
 552                 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 553                 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 554                 int qs;
 555
 556                 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 557                         IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
 558                         EQ_MAP(s, txq->q.abs_id) = &txq->q;
 559
 560                         /*
 561                          * The FW_IQ_CMD doesn't return the Absolute Queue IDs
 562                          * for Free Lists but since all of the Egress Queues
 563                          * (including Free Lists) have Relative Queue IDs
 564                          * which are computed as Absolute - Base Queue ID, we
 565                          * can synthesize the Absolute Queue IDs for the Free
 566                          * Lists.  This is useful for debugging purposes when
 567                          * we want to dump Queue Contexts via the PF Driver.
 568                          */
 569                         rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
 570                         EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
 571                 }
 572         }
 573         return 0;
 574
 575 err_free_queues:
 576         t4vf_free_sge_resources(adapter);
 577         return err;
 578 }
 579
 580 /*
 581  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
 582  * queues.  We configure the RSS CPU lookup table to distribute to the number
 583  * of HW receive queues, and the response queue lookup table to narrow that
 584  * down to the response queues actually configured for each "port" (Virtual
 585  * Interface).  We always configure the RSS mapping for all ports since the
 586  * mapping table has plenty of entries.
 587  */
 588 static int setup_rss(struct adapter *adapter)
 589 {
 590         int pidx;
 591
 592         for_each_port(adapter, pidx) {
 593                 struct port_info *pi = adap2pinfo(adapter, pidx);
 594                 struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
 595                 u16 rss[MAX_PORT_QSETS];
 596                 int qs, err;
 597
 598                 for (qs = 0; qs < pi->nqsets; qs++)
 599                         rss[qs] = rxq[qs].rspq.abs_id;
 600
 601                 err = t4vf_config_rss_range(adapter, pi->viid,
 602                                             0, pi->rss_size, rss, pi->nqsets);
 603                 if (err)
 604                         return err;
 605
 606                 /*
 607                  * Perform Global RSS Mode-specific initialization.
 608                  */
 609                 switch (adapter->params.rss.mode) {
 610                 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
 611                         /*
 612                          * If Tunnel All Lookup isn't specified in the global
 613                          * RSS Configuration, then we need to specify a
 614                          * default Ingress Queue for any ingress packets which
 615                          * aren't hashed.  We'll use our first ingress queue
 616                          * ...
 617                          */
 618                         if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
 619                                 union rss_vi_config config;
 620                                 err = t4vf_read_rss_vi_config(adapter,
 621                                                               pi->viid,
 622                                                               &config);
 623                                 if (err)
 624                                         return err;
 625                                 config.basicvirtual.defaultq =
 626                                         rxq[0].rspq.abs_id;
 627                                 err = t4vf_write_rss_vi_config(adapter,
 628                                                                pi->viid,
 629                                                                &config);
 630                                 if (err)
 631                                         return err;
 632                         }
 633                         break;
 634                 }
 635         }
 636
 637         return 0;
 638 }
 639
 640 /*
 641  * Bring the adapter up.  Called whenever we go from no "ports" open to having
 642  * one open.  This function performs the actions necessary to make an adapter
 643  * operational, such as completing the initialization of HW modules, and
 644  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
 645  * this is called "cxgb_up" in the PF Driver.)
 646  */
 647 static int adapter_up(struct adapter *adapter)
 648 {
 649         int err;
 650
 651         /*
 652          * If this is the first time we've been called, perform basic
 653          * adapter setup.  Once we've done this, many of our adapter
 654          * parameters can no longer be changed ...
 655          */
 656         if ((adapter->flags & FULL_INIT_DONE) == 0) {
 657                 err = setup_sge_queues(adapter);
 658                 if (err)
 659                         return err;
 660                 err = setup_rss(adapter);
 661                 if (err) {
 662                         t4vf_free_sge_resources(adapter);
 663                         return err;
 664                 }
 665
 666                 if (adapter->flags & USING_MSIX)
 667                         name_msix_vecs(adapter);
 668                 adapter->flags |= FULL_INIT_DONE;
 669         }
 670
 671         /*
 672          * Acquire our interrupt resources.  We only support MSI-X and MSI.
 673          */
 674         BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
 675         if (adapter->flags & USING_MSIX)
 676                 err = request_msix_queue_irqs(adapter);
 677         else
 678                 err = request_irq(adapter->pdev->irq,
 679                                   t4vf_intr_handler(adapter), 0,
 680                                   adapter->name, adapter);
 681         if (err) {
 682                 dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
 683                         err);
 684                 return err;
 685         }
 686
 687         /*
 688          * Enable NAPI ingress processing and return success.
 689          */
 690         enable_rx(adapter);
 691         t4vf_sge_start(adapter);
 692         return 0;
 693 }
 694
 695 /*
 696  * Bring the adapter down.  Called whenever the last "port" (Virtual
 697  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
 698  * Driver.)
 699  */
 700 static void adapter_down(struct adapter *adapter)
 701 {
 702         /*
 703          * Free interrupt resources.
 704          */
 705         if (adapter->flags & USING_MSIX)
 706                 free_msix_queue_irqs(adapter);
 707         else
 708                 free_irq(adapter->pdev->irq, adapter);
 709
 710         /*
 711          * Wait for NAPI handlers to finish.
 712          */
 713         quiesce_rx(adapter);
 714 }
 715
 716 /*
 717  * Start up a net device.
 718  */
 719 static int cxgb4vf_open(struct net_device *dev)
 720 {
 721         int err;
 722         struct port_info *pi = netdev_priv(dev);
 723         struct adapter *adapter = pi->adapter;
 724
 725         /*
 726          * If this is the first interface that we're opening on the "adapter",
 727          * bring the "adapter" up now.
 728          */
 729         if (adapter->open_device_map == 0) {
 730                 err = adapter_up(adapter);
 731                 if (err)
 732                         return err;
 733         }
 734
 735         /*
 736          * Note that this interface is up and start everything up ...
 737          */
 738         netif_set_real_num_tx_queues(dev, pi->nqsets);
 739         err = netif_set_real_num_rx_queues(dev, pi->nqsets);
 740         if (err)
 741                 goto err_unwind;
 742         err = link_start(dev);
 743         if (err)
 744                 goto err_unwind;
 745
 746         netif_tx_start_all_queues(dev);
 747         set_bit(pi->port_id, &adapter->open_device_map);
 748         return 0;
 749
 750 err_unwind:
 751         if (adapter->open_device_map == 0)
 752                 adapter_down(adapter);
 753         return err;
 754 }
 755
 756 /*
 757  * Shut down a net device.  This routine is called "cxgb_close" in the PF
 758  * Driver ...
 759  */
 760 static int cxgb4vf_stop(struct net_device *dev)
 761 {
 762         struct port_info *pi = netdev_priv(dev);
 763         struct adapter *adapter = pi->adapter;
 764
 765         netif_tx_stop_all_queues(dev);
 766         netif_carrier_off(dev);
 767         t4vf_enable_vi(adapter, pi->viid, false, false);
 768         pi->link_cfg.link_ok = 0;
 769
 770         clear_bit(pi->port_id, &adapter->open_device_map);
 771         if (adapter->open_device_map == 0)
 772                 adapter_down(adapter);
 773         return 0;
 774 }
 775
 776 /*
 777  * Translate our basic statistics into the standard "ifconfig" statistics.
 778  */
 779 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
 780 {
 781         struct t4vf_port_stats stats;
 782         struct port_info *pi = netdev2pinfo(dev);
 783         struct adapter *adapter = pi->adapter;
 784         struct net_device_stats *ns = &dev->stats;
 785         int err;
 786
 787         spin_lock(&adapter->stats_lock);
 788         err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
 789         spin_unlock(&adapter->stats_lock);
 790
 791         memset(ns, 0, sizeof(*ns));
 792         if (err)
 793                 return ns;
 794
 795         ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
 796                         stats.tx_ucast_bytes + stats.tx_offload_bytes);
 797         ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
 798                           stats.tx_ucast_frames + stats.tx_offload_frames);
 799         ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
 800                         stats.rx_ucast_bytes);
 801         ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
 802                           stats.rx_ucast_frames);
 803         ns->multicast = stats.rx_mcast_frames;
 804         ns->tx_errors = stats.tx_drop_frames;
 805         ns->rx_errors = stats.rx_err_frames;
 806
 807         return ns;
 808 }
 809
 810 /*
 811  * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting
 812  * at a specified offset within the list, into an array of addrss pointers and
 813  * return the number collected.
 814  */
 815 static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev,
 816                                                         const u8 **addr,
 817                                                         unsigned int offset,
 818                                                         unsigned int maxaddrs)
 819 {
 820         unsigned int index = 0;
 821         unsigned int naddr = 0;
 822         const struct netdev_hw_addr *ha;
 823
 824         for_each_dev_addr(dev, ha)
 825                 if (index++ >= offset) {
 826                         addr[naddr++] = ha->addr;
 827                         if (naddr >= maxaddrs)
 828                                 break;
 829                 }
 830         return naddr;
 831 }
 832
 833 /*
 834  * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting
 835  * at a specified offset within the list, into an array of addrss pointers and
 836  * return the number collected.
 837  */
 838 static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev,
 839                                                         const u8 **addr,
 840                                                         unsigned int offset,
 841                                                         unsigned int maxaddrs)
 842 {
 843         unsigned int index = 0;
 844         unsigned int naddr = 0;
 845         const struct netdev_hw_addr *ha;
 846
 847         netdev_for_each_mc_addr(ha, dev)
 848                 if (index++ >= offset) {
 849                         addr[naddr++] = ha->addr;
 850                         if (naddr >= maxaddrs)
 851                                 break;
 852                 }
 853         return naddr;
 854 }
 855
 856 /*
 857  * Configure the exact and hash address filters to handle a port's multicast
 858  * and secondary unicast MAC addresses.
 859  */
 860 static int set_addr_filters(const struct net_device *dev, bool sleep)
 861 {
 862         u64 mhash = 0;
 863         u64 uhash = 0;
 864         bool free = true;
 865         unsigned int offset, naddr;
 866         const u8 *addr[7];
 867         int ret;
 868         const struct port_info *pi = netdev_priv(dev);
 869
 870         /* first do the secondary unicast addresses */
 871         for (offset = 0; ; offset += naddr) {
 872                 naddr = collect_netdev_uc_list_addrs(dev, addr, offset,
 873                                                      ARRAY_SIZE(addr));
 874                 if (naddr == 0)
 875                         break;
 876
 877                 ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
 878                                           naddr, addr, NULL, &uhash, sleep);
 879                 if (ret < 0)
 880                         return ret;
 881
 882                 free = false;
 883         }
 884
 885         /* next set up the multicast addresses */
 886         for (offset = 0; ; offset += naddr) {
 887                 naddr = collect_netdev_mc_list_addrs(dev, addr, offset,
 888                                                      ARRAY_SIZE(addr));
 889                 if (naddr == 0)
 890                         break;
 891
 892                 ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
 893                                           naddr, addr, NULL, &mhash, sleep);
 894                 if (ret < 0)
 895                         return ret;
 896                 free = false;
 897         }
 898
 899         return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,
 900                                   uhash | mhash, sleep);
 901 }
 902
 903 /*
 904  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
 905  * If @mtu is -1 it is left unchanged.
 906  */
 907 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
 908 {
 909         int ret;
 910         struct port_info *pi = netdev_priv(dev);
 911
 912         ret = set_addr_filters(dev, sleep_ok);
 913         if (ret == 0)
 914                 ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1,
 915                                       (dev->flags & IFF_PROMISC) != 0,
 916                                       (dev->flags & IFF_ALLMULTI) != 0,
 917                                       1, -1, sleep_ok);
 918         return ret;
 919 }
 920
 921 /*
 922  * Set the current receive modes on the device.
 923  */
 924 static void cxgb4vf_set_rxmode(struct net_device *dev)
 925 {
 926         /* unfortunately we can't return errors to the stack */
 927         set_rxmode(dev, -1, false);
 928 }
 929
 930 /*
 931  * Find the entry in the interrupt holdoff timer value array which comes
 932  * closest to the specified interrupt holdoff value.
 933  */
 934 static int closest_timer(const struct sge *s, int us)
 935 {
 936         int i, timer_idx = 0, min_delta = INT_MAX;
 937
 938         for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
 939                 int delta = us - s->timer_val[i];
 940                 if (delta < 0)
 941                         delta = -delta;
 942                 if (delta < min_delta) {
 943                         min_delta = delta;
 944                         timer_idx = i;
 945                 }
 946         }
 947         return timer_idx;
 948 }
 949
 950 static int closest_thres(const struct sge *s, int thres)
 951 {
 952         int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
 953
 954         for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
 955                 delta = thres - s->counter_val[i];
 956                 if (delta < 0)
 957                         delta = -delta;
 958                 if (delta < min_delta) {
 959                         min_delta = delta;
 960                         pktcnt_idx = i;
 961                 }
 962         }
 963         return pktcnt_idx;
 964 }
 965
 966 /*
 967  * Return a queue's interrupt hold-off time in us.  0 means no timer.
 968  */
 969 static unsigned int qtimer_val(const struct adapter *adapter,
 970                                const struct sge_rspq *rspq)
 971 {
 972         unsigned int timer_idx = QINTR_TIMER_IDX_GET(rspq->intr_params);
 973
 974         return timer_idx < SGE_NTIMERS
 975                 ? adapter->sge.timer_val[timer_idx]
 976                 : 0;
 977 }
 978
 979 /**
 980  *      set_rxq_intr_params - set a queue's interrupt holdoff parameters
 981  *      @adapter: the adapter
 982  *      @rspq: the RX response queue
 983  *      @us: the hold-off time in us, or 0 to disable timer
 984  *      @cnt: the hold-off packet count, or 0 to disable counter
 985  *
 986  *      Sets an RX response queue's interrupt hold-off time and packet count.
 987  *      At least one of the two needs to be enabled for the queue to generate
 988  *      interrupts.
 989  */
 990 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
 991                                unsigned int us, unsigned int cnt)
 992 {
 993         unsigned int timer_idx;
 994
 995         /*
 996          * If both the interrupt holdoff timer and count are specified as
 997          * zero, default to a holdoff count of 1 ...
 998          */
 999         if ((us | cnt) == 0)
1000                 cnt = 1;
1001
1002         /*
1003          * If an interrupt holdoff count has been specified, then find the
1004          * closest configured holdoff count and use that.  If the response
1005          * queue has already been created, then update its queue context
1006          * parameters ...
1007          */
1008         if (cnt) {
1009                 int err;
1010                 u32 v, pktcnt_idx;
1011
1012                 pktcnt_idx = closest_thres(&adapter->sge, cnt);
1013                 if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1014                         v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1015                             FW_PARAMS_PARAM_X(
1016                                         FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1017                             FW_PARAMS_PARAM_YZ(rspq->cntxt_id);
1018                         err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1019                         if (err)
1020                                 return err;
1021                 }
1022                 rspq->pktcnt_idx = pktcnt_idx;
1023         }
1024
1025         /*
1026          * Compute the closest holdoff timer index from the supplied holdoff
1027          * timer value.
1028          */
1029         timer_idx = (us == 0
1030                      ? SGE_TIMER_RSTRT_CNTR
1031                      : closest_timer(&adapter->sge, us));
1032
1033         /*
1034          * Update the response queue's interrupt coalescing parameters and
1035          * return success.
1036          */
1037         rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
1038                              (cnt > 0 ? QINTR_CNT_EN : 0));
1039         return 0;
1040 }
1041
1042 /*
1043  * Return a version number to identify the type of adapter.  The scheme is:
1044  * - bits 0..9: chip version
1045  * - bits 10..15: chip revision
1046  */
1047 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1048 {
1049         /*
1050          * Chip version 4, revision 0x3f (cxgb4vf).
1051          */
1052         return 4 | (0x3f << 10);
1053 }
1054
1055 /*
1056  * Execute the specified ioctl command.
1057  */
1058 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1059 {
1060         int ret = 0;
1061
1062         switch (cmd) {
1063             /*
1064              * The VF Driver doesn't have access to any of the other
1065              * common Ethernet device ioctl()'s (like reading/writing
1066              * PHY registers, etc.
1067              */
1068
1069         default:
1070                 ret = -EOPNOTSUPP;
1071                 break;
1072         }
1073         return ret;
1074 }
1075
1076 /*
1077  * Change the device's MTU.
1078  */
1079 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1080 {
1081         int ret;
1082         struct port_info *pi = netdev_priv(dev);
1083
1084         /* accommodate SACK */
1085         if (new_mtu < 81)
1086                 return -EINVAL;
1087
1088         ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1089                               -1, -1, -1, -1, true);
1090         if (!ret)
1091                 dev->mtu = new_mtu;
1092         return ret;
1093 }
1094
1095 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1096         netdev_features_t features)
1097 {
1098         /*
1099          * Since there is no support for separate rx/tx vlan accel
1100          * enable/disable make sure tx flag is always in same state as rx.
1101          */
1102         if (features & NETIF_F_HW_VLAN_RX)
1103                 features |= NETIF_F_HW_VLAN_TX;
1104         else
1105                 features &= ~NETIF_F_HW_VLAN_TX;
1106
1107         return features;
1108 }
1109
1110 static int cxgb4vf_set_features(struct net_device *dev,
1111         netdev_features_t features)
1112 {
1113         struct port_info *pi = netdev_priv(dev);
1114         netdev_features_t changed = dev->features ^ features;
1115
1116         if (changed & NETIF_F_HW_VLAN_RX)
1117                 t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1118                                 features & NETIF_F_HW_VLAN_TX, 0);
1119
1120         return 0;
1121 }
1122
1123 /*
1124  * Change the devices MAC address.
1125  */
1126 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1127 {
1128         int ret;
1129         struct sockaddr *addr = _addr;
1130         struct port_info *pi = netdev_priv(dev);
1131
1132         if (!is_valid_ether_addr(addr->sa_data))
1133                 return -EINVAL;
1134
1135         ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1136                               addr->sa_data, true);
1137         if (ret < 0)
1138                 return ret;
1139
1140         memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1141         pi->xact_addr_filt = ret;
1142         return 0;
1143 }
1144
1145 #ifdef CONFIG_NET_POLL_CONTROLLER
1146 /*
1147  * Poll all of our receive queues.  This is called outside of normal interrupt
1148  * context.
1149  */
1150 static void cxgb4vf_poll_controller(struct net_device *dev)
1151 {
1152         struct port_info *pi = netdev_priv(dev);
1153         struct adapter *adapter = pi->adapter;
1154
1155         if (adapter->flags & USING_MSIX) {
1156                 struct sge_eth_rxq *rxq;
1157                 int nqsets;
1158
1159                 rxq = &adapter->sge.ethrxq[pi->first_qset];
1160                 for (nqsets = pi->nqsets; nqsets; nqsets--) {
1161                         t4vf_sge_intr_msix(0, &rxq->rspq);
1162                         rxq++;
1163                 }
1164         } else
1165                 t4vf_intr_handler(adapter)(0, adapter);
1166 }
1167 #endif
1168
1169 /*
1170  * Ethtool operations.
1171  * ===================
1172  *
1173  * Note that we don't support any ethtool operations which change the physical
1174  * state of the port to which we're linked.
1175  */
1176
1177 /*
1178  * Return current port link settings.
1179  */
1180 static int cxgb4vf_get_settings(struct net_device *dev,
1181                                 struct ethtool_cmd *cmd)
1182 {
1183         const struct port_info *pi = netdev_priv(dev);
1184
1185         cmd->supported = pi->link_cfg.supported;
1186         cmd->advertising = pi->link_cfg.advertising;
1187         ethtool_cmd_speed_set(cmd,
1188                               netif_carrier_ok(dev) ? pi->link_cfg.speed : -1);
1189         cmd->duplex = DUPLEX_FULL;
1190
1191         cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE;
1192         cmd->phy_address = pi->port_id;
1193         cmd->transceiver = XCVR_EXTERNAL;
1194         cmd->autoneg = pi->link_cfg.autoneg;
1195         cmd->maxtxpkt = 0;
1196         cmd->maxrxpkt = 0;
1197         return 0;
1198 }
1199
1200 /*
1201  * Return our driver information.
1202  */
1203 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1204                                 struct ethtool_drvinfo *drvinfo)
1205 {
1206         struct adapter *adapter = netdev2adap(dev);
1207
1208         strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1209         strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1210         strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1211                 sizeof(drvinfo->bus_info));
1212         snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1213                  "%u.%u.%u.%u, TP %u.%u.%u.%u",
1214                  FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.fwrev),
1215                  FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.fwrev),
1216                  FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.fwrev),
1217                  FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.fwrev),
1218                  FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.tprev),
1219                  FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.tprev),
1220                  FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.tprev),
1221                  FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.tprev));
1222 }
1223
1224 /*
1225  * Return current adapter message level.
1226  */
1227 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1228 {
1229         return netdev2adap(dev)->msg_enable;
1230 }
1231
1232 /*
1233  * Set current adapter message level.
1234  */
1235 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1236 {
1237         netdev2adap(dev)->msg_enable = msglevel;
1238 }
1239
1240 /*
1241  * Return the device's current Queue Set ring size parameters along with the
1242  * allowed maximum values.  Since ethtool doesn't understand the concept of
1243  * multi-queue devices, we just return the current values associated with the
1244  * first Queue Set.
1245  */
1246 static void cxgb4vf_get_ringparam(struct net_device *dev,
1247                                   struct ethtool_ringparam *rp)
1248 {
1249         const struct port_info *pi = netdev_priv(dev);
1250         const struct sge *s = &pi->adapter->sge;
1251
1252         rp->rx_max_pending = MAX_RX_BUFFERS;
1253         rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1254         rp->rx_jumbo_max_pending = 0;
1255         rp->tx_max_pending = MAX_TXQ_ENTRIES;
1256
1257         rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1258         rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1259         rp->rx_jumbo_pending = 0;
1260         rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1261 }
1262
1263 /*
1264  * Set the Queue Set ring size parameters for the device.  Again, since
1265  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1266  * apply these new values across all of the Queue Sets associated with the
1267  * device -- after vetting them of course!
1268  */
1269 static int cxgb4vf_set_ringparam(struct net_device *dev,
1270                                  struct ethtool_ringparam *rp)
1271 {
1272         const struct port_info *pi = netdev_priv(dev);
1273         struct adapter *adapter = pi->adapter;
1274         struct sge *s = &adapter->sge;
1275         int qs;
1276
1277         if (rp->rx_pending > MAX_RX_BUFFERS ||
1278             rp->rx_jumbo_pending ||
1279             rp->tx_pending > MAX_TXQ_ENTRIES ||
1280             rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1281             rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1282             rp->rx_pending < MIN_FL_ENTRIES ||
1283             rp->tx_pending < MIN_TXQ_ENTRIES)
1284                 return -EINVAL;
1285
1286         if (adapter->flags & FULL_INIT_DONE)
1287                 return -EBUSY;
1288
1289         for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1290                 s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1291                 s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1292                 s->ethtxq[qs].q.size = rp->tx_pending;
1293         }
1294         return 0;
1295 }
1296
1297 /*
1298  * Return the interrupt holdoff timer and count for the first Queue Set on the
1299  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1300  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1301  */
1302 static int cxgb4vf_get_coalesce(struct net_device *dev,
1303                                 struct ethtool_coalesce *coalesce)
1304 {
1305         const struct port_info *pi = netdev_priv(dev);
1306         const struct adapter *adapter = pi->adapter;
1307         const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1308
1309         coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1310         coalesce->rx_max_coalesced_frames =
1311                 ((rspq->intr_params & QINTR_CNT_EN)
1312                  ? adapter->sge.counter_val[rspq->pktcnt_idx]
1313                  : 0);
1314         return 0;
1315 }
1316
1317 /*
1318  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1319  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1320  * the interrupt holdoff timer on any of the device's Queue Sets.
1321  */
1322 static int cxgb4vf_set_coalesce(struct net_device *dev,
1323                                 struct ethtool_coalesce *coalesce)
1324 {
1325         const struct port_info *pi = netdev_priv(dev);
1326         struct adapter *adapter = pi->adapter;
1327
1328         return set_rxq_intr_params(adapter,
1329                                    &adapter->sge.ethrxq[pi->first_qset].rspq,
1330                                    coalesce->rx_coalesce_usecs,
1331                                    coalesce->rx_max_coalesced_frames);
1332 }
1333
1334 /*
1335  * Report current port link pause parameter settings.
1336  */
1337 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1338                                    struct ethtool_pauseparam *pauseparam)
1339 {
1340         struct port_info *pi = netdev_priv(dev);
1341
1342         pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1343         pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1344         pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1345 }
1346
1347 /*
1348  * Identify the port by blinking the port's LED.
1349  */
1350 static int cxgb4vf_phys_id(struct net_device *dev,
1351                            enum ethtool_phys_id_state state)
1352 {
1353         unsigned int val;
1354         struct port_info *pi = netdev_priv(dev);
1355
1356         if (state == ETHTOOL_ID_ACTIVE)
1357                 val = 0xffff;
1358         else if (state == ETHTOOL_ID_INACTIVE)
1359                 val = 0;
1360         else
1361                 return -EINVAL;
1362
1363         return t4vf_identify_port(pi->adapter, pi->viid, val);
1364 }
1365
1366 /*
1367  * Port stats maintained per queue of the port.
1368  */
1369 struct queue_port_stats {
1370         u64 tso;
1371         u64 tx_csum;
1372         u64 rx_csum;
1373         u64 vlan_ex;
1374         u64 vlan_ins;
1375         u64 lro_pkts;
1376         u64 lro_merged;
1377 };
1378
1379 /*
1380  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1381  * these need to match the order of statistics returned by
1382  * t4vf_get_port_stats().
1383  */
1384 static const char stats_strings[][ETH_GSTRING_LEN] = {
1385         /*
1386          * These must match the layout of the t4vf_port_stats structure.
1387          */
1388         "TxBroadcastBytes  ",
1389         "TxBroadcastFrames ",
1390         "TxMulticastBytes  ",
1391         "TxMulticastFrames ",
1392         "TxUnicastBytes    ",
1393         "TxUnicastFrames   ",
1394         "TxDroppedFrames   ",
1395         "TxOffloadBytes    ",
1396         "TxOffloadFrames   ",
1397         "RxBroadcastBytes  ",
1398         "RxBroadcastFrames ",
1399         "RxMulticastBytes  ",
1400         "RxMulticastFrames ",
1401         "RxUnicastBytes    ",
1402         "RxUnicastFrames   ",
1403         "RxErrorFrames     ",
1404
1405         /*
1406          * These are accumulated per-queue statistics and must match the
1407          * order of the fields in the queue_port_stats structure.
1408          */
1409         "TSO               ",
1410         "TxCsumOffload     ",
1411         "RxCsumGood        ",
1412         "VLANextractions   ",
1413         "VLANinsertions    ",
1414         "GROPackets        ",
1415         "GROMerged         ",
1416 };
1417
1418 /*
1419  * Return the number of statistics in the specified statistics set.
1420  */
1421 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1422 {
1423         switch (sset) {
1424         case ETH_SS_STATS:
1425                 return ARRAY_SIZE(stats_strings);
1426         default:
1427                 return -EOPNOTSUPP;
1428         }
1429         /*NOTREACHED*/
1430 }
1431
1432 /*
1433  * Return the strings for the specified statistics set.
1434  */
1435 static void cxgb4vf_get_strings(struct net_device *dev,
1436                                 u32 sset,
1437                                 u8 *data)
1438 {
1439         switch (sset) {
1440         case ETH_SS_STATS:
1441                 memcpy(data, stats_strings, sizeof(stats_strings));
1442                 break;
1443         }
1444 }
1445
1446 /*
1447  * Small utility routine to accumulate queue statistics across the queues of
1448  * a "port".
1449  */
1450 static void collect_sge_port_stats(const struct adapter *adapter,
1451                                    const struct port_info *pi,
1452                                    struct queue_port_stats *stats)
1453 {
1454         const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1455         const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1456         int qs;
1457
1458         memset(stats, 0, sizeof(*stats));
1459         for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1460                 stats->tso += txq->tso;
1461                 stats->tx_csum += txq->tx_cso;
1462                 stats->rx_csum += rxq->stats.rx_cso;
1463                 stats->vlan_ex += rxq->stats.vlan_ex;
1464                 stats->vlan_ins += txq->vlan_ins;
1465                 stats->lro_pkts += rxq->stats.lro_pkts;
1466                 stats->lro_merged += rxq->stats.lro_merged;
1467         }
1468 }
1469
1470 /*
1471  * Return the ETH_SS_STATS statistics set.
1472  */
1473 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1474                                       struct ethtool_stats *stats,
1475                                       u64 *data)
1476 {
1477         struct port_info *pi = netdev2pinfo(dev);
1478         struct adapter *adapter = pi->adapter;
1479         int err = t4vf_get_port_stats(adapter, pi->pidx,
1480                                       (struct t4vf_port_stats *)data);
1481         if (err)
1482                 memset(data, 0, sizeof(struct t4vf_port_stats));
1483
1484         data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1485         collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1486 }
1487
1488 /*
1489  * Return the size of our register map.
1490  */
1491 static int cxgb4vf_get_regs_len(struct net_device *dev)
1492 {
1493         return T4VF_REGMAP_SIZE;
1494 }
1495
1496 /*
1497  * Dump a block of registers, start to end inclusive, into a buffer.
1498  */
1499 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1500                            unsigned int start, unsigned int end)
1501 {
1502         u32 *bp = regbuf + start - T4VF_REGMAP_START;
1503
1504         for ( ; start <= end; start += sizeof(u32)) {
1505                 /*
1506                  * Avoid reading the Mailbox Control register since that
1507                  * can trigger a Mailbox Ownership Arbitration cycle and
1508                  * interfere with communication with the firmware.
1509                  */
1510                 if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1511                         *bp++ = 0xffff;
1512                 else
1513                         *bp++ = t4_read_reg(adapter, start);
1514         }
1515 }
1516
1517 /*
1518  * Copy our entire register map into the provided buffer.
1519  */
1520 static void cxgb4vf_get_regs(struct net_device *dev,
1521                              struct ethtool_regs *regs,
1522                              void *regbuf)
1523 {
1524         struct adapter *adapter = netdev2adap(dev);
1525
1526         regs->version = mk_adap_vers(adapter);
1527
1528         /*
1529          * Fill in register buffer with our register map.
1530          */
1531         memset(regbuf, 0, T4VF_REGMAP_SIZE);
1532
1533         reg_block_dump(adapter, regbuf,
1534                        T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1535                        T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1536         reg_block_dump(adapter, regbuf,
1537                        T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1538                        T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1539         reg_block_dump(adapter, regbuf,
1540                        T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1541                        T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_LAST);
1542         reg_block_dump(adapter, regbuf,
1543                        T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1544                        T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1545
1546         reg_block_dump(adapter, regbuf,
1547                        T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1548                        T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1549 }
1550
1551 /*
1552  * Report current Wake On LAN settings.
1553  */
1554 static void cxgb4vf_get_wol(struct net_device *dev,
1555                             struct ethtool_wolinfo *wol)
1556 {
1557         wol->supported = 0;
1558         wol->wolopts = 0;
1559         memset(&wol->sopass, 0, sizeof(wol->sopass));
1560 }
1561
1562 /*
1563  * TCP Segmentation Offload flags which we support.
1564  */
1565 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1566
1567 static struct ethtool_ops cxgb4vf_ethtool_ops = {
1568         .get_settings           = cxgb4vf_get_settings,
1569         .get_drvinfo            = cxgb4vf_get_drvinfo,
1570         .get_msglevel           = cxgb4vf_get_msglevel,
1571         .set_msglevel           = cxgb4vf_set_msglevel,
1572         .get_ringparam          = cxgb4vf_get_ringparam,
1573         .set_ringparam          = cxgb4vf_set_ringparam,
1574         .get_coalesce           = cxgb4vf_get_coalesce,
1575         .set_coalesce           = cxgb4vf_set_coalesce,
1576         .get_pauseparam         = cxgb4vf_get_pauseparam,
1577         .get_link               = ethtool_op_get_link,
1578         .get_strings            = cxgb4vf_get_strings,
1579         .set_phys_id            = cxgb4vf_phys_id,
1580         .get_sset_count         = cxgb4vf_get_sset_count,
1581         .get_ethtool_stats      = cxgb4vf_get_ethtool_stats,
1582         .get_regs_len           = cxgb4vf_get_regs_len,
1583         .get_regs               = cxgb4vf_get_regs,
1584         .get_wol                = cxgb4vf_get_wol,
1585 };
1586
1587 /*
1588  * /sys/kernel/debug/cxgb4vf support code and data.
1589  * ================================================
1590  */
1591
1592 /*
1593  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1594  */
1595 #define QPL     4
1596
1597 static int sge_qinfo_show(struct seq_file *seq, void *v)
1598 {
1599         struct adapter *adapter = seq->private;
1600         int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1601         int qs, r = (uintptr_t)v - 1;
1602
1603         if (r)
1604                 seq_putc(seq, '\n');
1605
1606         #define S3(fmt_spec, s, v) \
1607                 do {\
1608                         seq_printf(seq, "%-12s", s); \
1609                         for (qs = 0; qs < n; ++qs) \
1610                                 seq_printf(seq, " %16" fmt_spec, v); \
1611                         seq_putc(seq, '\n'); \
1612                 } while (0)
1613         #define S(s, v)         S3("s", s, v)
1614         #define T(s, v)         S3("u", s, txq[qs].v)
1615         #define R(s, v)         S3("u", s, rxq[qs].v)
1616
1617         if (r < eth_entries) {
1618                 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1619                 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1620                 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1621
1622                 S("QType:", "Ethernet");
1623                 S("Interface:",
1624                   (rxq[qs].rspq.netdev
1625                    ? rxq[qs].rspq.netdev->name
1626                    : "N/A"));
1627                 S3("d", "Port:",
1628                    (rxq[qs].rspq.netdev
1629                     ? ((struct port_info *)
1630                        netdev_priv(rxq[qs].rspq.netdev))->port_id
1631                     : -1));
1632                 T("TxQ ID:", q.abs_id);
1633                 T("TxQ size:", q.size);
1634                 T("TxQ inuse:", q.in_use);
1635                 T("TxQ PIdx:", q.pidx);
1636                 T("TxQ CIdx:", q.cidx);
1637                 R("RspQ ID:", rspq.abs_id);
1638                 R("RspQ size:", rspq.size);
1639                 R("RspQE size:", rspq.iqe_len);
1640                 S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1641                 S3("u", "Intr pktcnt:",
1642                    adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1643                 R("RspQ CIdx:", rspq.cidx);
1644                 R("RspQ Gen:", rspq.gen);
1645                 R("FL ID:", fl.abs_id);
1646                 R("FL size:", fl.size - MIN_FL_RESID);
1647                 R("FL avail:", fl.avail);
1648                 R("FL PIdx:", fl.pidx);
1649                 R("FL CIdx:", fl.cidx);
1650                 return 0;
1651         }
1652
1653         r -= eth_entries;
1654         if (r == 0) {
1655                 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1656
1657                 seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1658                 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1659                 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1660                            qtimer_val(adapter, evtq));
1661                 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1662                            adapter->sge.counter_val[evtq->pktcnt_idx]);
1663                 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1664                 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1665         } else if (r == 1) {
1666                 const struct sge_rspq *intrq = &adapter->sge.intrq;
1667
1668                 seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1669                 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1670                 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1671                            qtimer_val(adapter, intrq));
1672                 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1673                            adapter->sge.counter_val[intrq->pktcnt_idx]);
1674                 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1675                 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1676         }
1677
1678         #undef R
1679         #undef T
1680         #undef S
1681         #undef S3
1682
1683         return 0;
1684 }
1685
1686 /*
1687  * Return the number of "entries" in our "file".  We group the multi-Queue
1688  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1689  *
1690  *     Ethernet RX/TX Queue Sets
1691  *     Firmware Event Queue
1692  *     Forwarded Interrupt Queue (if in MSI mode)
1693  */
1694 static int sge_queue_entries(const struct adapter *adapter)
1695 {
1696         return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1697                 ((adapter->flags & USING_MSI) != 0);
1698 }
1699
1700 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1701 {
1702         int entries = sge_queue_entries(seq->private);
1703
1704         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1705 }
1706
1707 static void sge_queue_stop(struct seq_file *seq, void *v)
1708 {
1709 }
1710
1711 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1712 {
1713         int entries = sge_queue_entries(seq->private);
1714
1715         ++*pos;
1716         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1717 }
1718
1719 static const struct seq_operations sge_qinfo_seq_ops = {
1720         .start = sge_queue_start,
1721         .next  = sge_queue_next,
1722         .stop  = sge_queue_stop,
1723         .show  = sge_qinfo_show
1724 };
1725
1726 static int sge_qinfo_open(struct inode *inode, struct file *file)
1727 {
1728         int res = seq_open(file, &sge_qinfo_seq_ops);
1729
1730         if (!res) {
1731                 struct seq_file *seq = file->private_data;
1732                 seq->private = inode->i_private;
1733         }
1734         return res;
1735 }
1736
1737 static const struct file_operations sge_qinfo_debugfs_fops = {
1738         .owner   = THIS_MODULE,
1739         .open    = sge_qinfo_open,
1740         .read    = seq_read,
1741         .llseek  = seq_lseek,
1742         .release = seq_release,
1743 };
1744
1745 /*
1746  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
1747  */
1748 #define QPL     4
1749
1750 static int sge_qstats_show(struct seq_file *seq, void *v)
1751 {
1752         struct adapter *adapter = seq->private;
1753         int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1754         int qs, r = (uintptr_t)v - 1;
1755
1756         if (r)
1757                 seq_putc(seq, '\n');
1758
1759         #define S3(fmt, s, v) \
1760                 do { \
1761                         seq_printf(seq, "%-16s", s); \
1762                         for (qs = 0; qs < n; ++qs) \
1763                                 seq_printf(seq, " %8" fmt, v); \
1764                         seq_putc(seq, '\n'); \
1765                 } while (0)
1766         #define S(s, v)         S3("s", s, v)
1767
1768         #define T3(fmt, s, v)   S3(fmt, s, txq[qs].v)
1769         #define T(s, v)         T3("lu", s, v)
1770
1771         #define R3(fmt, s, v)   S3(fmt, s, rxq[qs].v)
1772         #define R(s, v)         R3("lu", s, v)
1773
1774         if (r < eth_entries) {
1775                 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1776                 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1777                 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1778
1779                 S("QType:", "Ethernet");
1780                 S("Interface:",
1781                   (rxq[qs].rspq.netdev
1782                    ? rxq[qs].rspq.netdev->name
1783                    : "N/A"));
1784                 R3("u", "RspQNullInts:", rspq.unhandled_irqs);
1785                 R("RxPackets:", stats.pkts);
1786                 R("RxCSO:", stats.rx_cso);
1787                 R("VLANxtract:", stats.vlan_ex);
1788                 R("LROmerged:", stats.lro_merged);
1789                 R("LROpackets:", stats.lro_pkts);
1790                 R("RxDrops:", stats.rx_drops);
1791                 T("TSO:", tso);
1792                 T("TxCSO:", tx_cso);
1793                 T("VLANins:", vlan_ins);
1794                 T("TxQFull:", q.stops);
1795                 T("TxQRestarts:", q.restarts);
1796                 T("TxMapErr:", mapping_err);
1797                 R("FLAllocErr:", fl.alloc_failed);
1798                 R("FLLrgAlcErr:", fl.large_alloc_failed);
1799                 R("FLStarving:", fl.starving);
1800                 return 0;
1801         }
1802
1803         r -= eth_entries;
1804         if (r == 0) {
1805                 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1806
1807                 seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
1808                 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1809                            evtq->unhandled_irqs);
1810                 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
1811                 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
1812         } else if (r == 1) {
1813                 const struct sge_rspq *intrq = &adapter->sge.intrq;
1814
1815                 seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
1816                 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1817                            intrq->unhandled_irqs);
1818                 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
1819                 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
1820         }
1821
1822         #undef R
1823         #undef T
1824         #undef S
1825         #undef R3
1826         #undef T3
1827         #undef S3
1828
1829         return 0;
1830 }
1831
1832 /*
1833  * Return the number of "entries" in our "file".  We group the multi-Queue
1834  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1835  *
1836  *     Ethernet RX/TX Queue Sets
1837  *     Firmware Event Queue
1838  *     Forwarded Interrupt Queue (if in MSI mode)
1839  */
1840 static int sge_qstats_entries(const struct adapter *adapter)
1841 {
1842         return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1843                 ((adapter->flags & USING_MSI) != 0);
1844 }
1845
1846 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
1847 {
1848         int entries = sge_qstats_entries(seq->private);
1849
1850         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1851 }
1852
1853 static void sge_qstats_stop(struct seq_file *seq, void *v)
1854 {
1855 }
1856
1857 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
1858 {
1859         int entries = sge_qstats_entries(seq->private);
1860
1861         (*pos)++;
1862         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1863 }
1864
1865 static const struct seq_operations sge_qstats_seq_ops = {
1866         .start = sge_qstats_start,
1867         .next  = sge_qstats_next,
1868         .stop  = sge_qstats_stop,
1869         .show  = sge_qstats_show
1870 };
1871
1872 static int sge_qstats_open(struct inode *inode, struct file *file)
1873 {
1874         int res = seq_open(file, &sge_qstats_seq_ops);
1875
1876         if (res == 0) {
1877                 struct seq_file *seq = file->private_data;
1878                 seq->private = inode->i_private;
1879         }
1880         return res;
1881 }
1882
1883 static const struct file_operations sge_qstats_proc_fops = {
1884         .owner   = THIS_MODULE,
1885         .open    = sge_qstats_open,
1886         .read    = seq_read,
1887         .llseek  = seq_lseek,
1888         .release = seq_release,
1889 };
1890
1891 /*
1892  * Show PCI-E SR-IOV Virtual Function Resource Limits.
1893  */
1894 static int resources_show(struct seq_file *seq, void *v)
1895 {
1896         struct adapter *adapter = seq->private;
1897         struct vf_resources *vfres = &adapter->params.vfres;
1898
1899         #define S(desc, fmt, var) \
1900                 seq_printf(seq, "%-60s " fmt "\n", \
1901                            desc " (" #var "):", vfres->var)
1902
1903         S("Virtual Interfaces", "%d", nvi);
1904         S("Egress Queues", "%d", neq);
1905         S("Ethernet Control", "%d", nethctrl);
1906         S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
1907         S("Ingress Queues", "%d", niq);
1908         S("Traffic Class", "%d", tc);
1909         S("Port Access Rights Mask", "%#x", pmask);
1910         S("MAC Address Filters", "%d", nexactf);
1911         S("Firmware Command Read Capabilities", "%#x", r_caps);
1912         S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
1913
1914         #undef S
1915
1916         return 0;
1917 }
1918
1919 static int resources_open(struct inode *inode, struct file *file)
1920 {
1921         return single_open(file, resources_show, inode->i_private);
1922 }
1923
1924 static const struct file_operations resources_proc_fops = {
1925         .owner   = THIS_MODULE,
1926         .open    = resources_open,
1927         .read    = seq_read,
1928         .llseek  = seq_lseek,
1929         .release = single_release,
1930 };
1931
1932 /*
1933  * Show Virtual Interfaces.
1934  */
1935 static int interfaces_show(struct seq_file *seq, void *v)
1936 {
1937         if (v == SEQ_START_TOKEN) {
1938                 seq_puts(seq, "Interface  Port   VIID\n");
1939         } else {
1940                 struct adapter *adapter = seq->private;
1941                 int pidx = (uintptr_t)v - 2;
1942                 struct net_device *dev = adapter->port[pidx];
1943                 struct port_info *pi = netdev_priv(dev);
1944
1945                 seq_printf(seq, "%9s  %4d  %#5x\n",
1946                            dev->name, pi->port_id, pi->viid);
1947         }
1948         return 0;
1949 }
1950
1951 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
1952 {
1953         return pos <= adapter->params.nports
1954                 ? (void *)(uintptr_t)(pos + 1)
1955                 : NULL;
1956 }
1957
1958 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
1959 {
1960         return *pos
1961                 ? interfaces_get_idx(seq->private, *pos)
1962                 : SEQ_START_TOKEN;
1963 }
1964
1965 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
1966 {
1967         (*pos)++;
1968         return interfaces_get_idx(seq->private, *pos);
1969 }
1970
1971 static void interfaces_stop(struct seq_file *seq, void *v)
1972 {
1973 }
1974
1975 static const struct seq_operations interfaces_seq_ops = {
1976         .start = interfaces_start,
1977         .next  = interfaces_next,
1978         .stop  = interfaces_stop,
1979         .show  = interfaces_show
1980 };
1981
1982 static int interfaces_open(struct inode *inode, struct file *file)
1983 {
1984         int res = seq_open(file, &interfaces_seq_ops);
1985
1986         if (res == 0) {
1987                 struct seq_file *seq = file->private_data;
1988                 seq->private = inode->i_private;
1989         }
1990         return res;
1991 }
1992
1993 static const struct file_operations interfaces_proc_fops = {
1994         .owner   = THIS_MODULE,
1995         .open    = interfaces_open,
1996         .read    = seq_read,
1997         .llseek  = seq_lseek,
1998         .release = seq_release,
1999 };
2000
2001 /*
2002  * /sys/kernel/debugfs/cxgb4vf/ files list.
2003  */
2004 struct cxgb4vf_debugfs_entry {
2005         const char *name;               /* name of debugfs node */
2006         mode_t mode;                    /* file system mode */
2007         const struct file_operations *fops;
2008 };
2009
2010 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2011         { "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2012         { "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2013         { "resources",  S_IRUGO, &resources_proc_fops },
2014         { "interfaces", S_IRUGO, &interfaces_proc_fops },
2015 };
2016
2017 /*
2018  * Module and device initialization and cleanup code.
2019  * ==================================================
2020  */
2021
2022 /*
2023  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2024  * directory (debugfs_root) has already been set up.
2025  */
2026 static int __devinit setup_debugfs(struct adapter *adapter)
2027 {
2028         int i;
2029
2030         BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2031
2032         /*
2033          * Debugfs support is best effort.
2034          */
2035         for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2036                 (void)debugfs_create_file(debugfs_files[i].name,
2037                                   debugfs_files[i].mode,
2038                                   adapter->debugfs_root,
2039                                   (void *)adapter,
2040                                   debugfs_files[i].fops);
2041
2042         return 0;
2043 }
2044
2045 /*
2046  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2047  * it to our caller to tear down the directory (debugfs_root).
2048  */
2049 static void cleanup_debugfs(struct adapter *adapter)
2050 {
2051         BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2052
2053         /*
2054          * Unlike our sister routine cleanup_proc(), we don't need to remove
2055          * individual entries because a call will be made to
2056          * debugfs_remove_recursive().  We just need to clean up any ancillary
2057          * persistent state.
2058          */
2059         /* nothing to do */
2060 }
2061
2062 /*
2063  * Perform early "adapter" initialization.  This is where we discover what
2064  * adapter parameters we're going to be using and initialize basic adapter
2065  * hardware support.
2066  */
2067 static int __devinit adap_init0(struct adapter *adapter)
2068 {
2069         struct vf_resources *vfres = &adapter->params.vfres;
2070         struct sge_params *sge_params = &adapter->params.sge;
2071         struct sge *s = &adapter->sge;
2072         unsigned int ethqsets;
2073         int err;
2074
2075         /*
2076          * Wait for the device to become ready before proceeding ...
2077          */
2078         err = t4vf_wait_dev_ready(adapter);
2079         if (err) {
2080                 dev_err(adapter->pdev_dev, "device didn't become ready:"
2081                         " err=%d\n", err);
2082                 return err;
2083         }
2084
2085         /*
2086          * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2087          * 2.6.31 and later we can't call pci_reset_function() in order to
2088          * issue an FLR because of a self- deadlock on the device semaphore.
2089          * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2090          * cases where they're needed -- for instance, some versions of KVM
2091          * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2092          * use the firmware based reset in order to reset any per function
2093          * state.
2094          */
2095         err = t4vf_fw_reset(adapter);
2096         if (err < 0) {
2097                 dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2098                 return err;
2099         }
2100
2101         /*
2102          * Grab basic operational parameters.  These will predominantly have
2103          * been set up by the Physical Function Driver or will be hard coded
2104          * into the adapter.  We just have to live with them ...  Note that
2105          * we _must_ get our VPD parameters before our SGE parameters because
2106          * we need to know the adapter's core clock from the VPD in order to
2107          * properly decode the SGE Timer Values.
2108          */
2109         err = t4vf_get_dev_params(adapter);
2110         if (err) {
2111                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2112                         " device parameters: err=%d\n", err);
2113                 return err;
2114         }
2115         err = t4vf_get_vpd_params(adapter);
2116         if (err) {
2117                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2118                         " VPD parameters: err=%d\n", err);
2119                 return err;
2120         }
2121         err = t4vf_get_sge_params(adapter);
2122         if (err) {
2123                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2124                         " SGE parameters: err=%d\n", err);
2125                 return err;
2126         }
2127         err = t4vf_get_rss_glb_config(adapter);
2128         if (err) {
2129                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2130                         " RSS parameters: err=%d\n", err);
2131                 return err;
2132         }
2133         if (adapter->params.rss.mode !=
2134             FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2135                 dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2136                         " mode %d\n", adapter->params.rss.mode);
2137                 return -EINVAL;
2138         }
2139         err = t4vf_sge_init(adapter);
2140         if (err) {
2141                 dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2142                         " err=%d\n", err);
2143                 return err;
2144         }
2145
2146         /*
2147          * Retrieve our RX interrupt holdoff timer values and counter
2148          * threshold values from the SGE parameters.
2149          */
2150         s->timer_val[0] = core_ticks_to_us(adapter,
2151                 TIMERVALUE0_GET(sge_params->sge_timer_value_0_and_1));
2152         s->timer_val[1] = core_ticks_to_us(adapter,
2153                 TIMERVALUE1_GET(sge_params->sge_timer_value_0_and_1));
2154         s->timer_val[2] = core_ticks_to_us(adapter,
2155                 TIMERVALUE0_GET(sge_params->sge_timer_value_2_and_3));
2156         s->timer_val[3] = core_ticks_to_us(adapter,
2157                 TIMERVALUE1_GET(sge_params->sge_timer_value_2_and_3));
2158         s->timer_val[4] = core_ticks_to_us(adapter,
2159                 TIMERVALUE0_GET(sge_params->sge_timer_value_4_and_5));
2160         s->timer_val[5] = core_ticks_to_us(adapter,
2161                 TIMERVALUE1_GET(sge_params->sge_timer_value_4_and_5));
2162
2163         s->counter_val[0] =
2164                 THRESHOLD_0_GET(sge_params->sge_ingress_rx_threshold);
2165         s->counter_val[1] =
2166                 THRESHOLD_1_GET(sge_params->sge_ingress_rx_threshold);
2167         s->counter_val[2] =
2168                 THRESHOLD_2_GET(sge_params->sge_ingress_rx_threshold);
2169         s->counter_val[3] =
2170                 THRESHOLD_3_GET(sge_params->sge_ingress_rx_threshold);
2171
2172         /*
2173          * Grab our Virtual Interface resource allocation, extract the
2174          * features that we're interested in and do a bit of sanity testing on
2175          * what we discover.
2176          */
2177         err = t4vf_get_vfres(adapter);
2178         if (err) {
2179                 dev_err(adapter->pdev_dev, "unable to get virtual interface"
2180                         " resources: err=%d\n", err);
2181                 return err;
2182         }
2183
2184         /*
2185          * The number of "ports" which we support is equal to the number of
2186          * Virtual Interfaces with which we've been provisioned.
2187          */
2188         adapter->params.nports = vfres->nvi;
2189         if (adapter->params.nports > MAX_NPORTS) {
2190                 dev_warn(adapter->pdev_dev, "only using %d of %d allowed"
2191                          " virtual interfaces\n", MAX_NPORTS,
2192                          adapter->params.nports);
2193                 adapter->params.nports = MAX_NPORTS;
2194         }
2195
2196         /*
2197          * We need to reserve a number of the ingress queues with Free List
2198          * and Interrupt capabilities for special interrupt purposes (like
2199          * asynchronous firmware messages, or forwarded interrupts if we're
2200          * using MSI).  The rest of the FL/Intr-capable ingress queues will be
2201          * matched up one-for-one with Ethernet/Control egress queues in order
2202          * to form "Queue Sets" which will be aportioned between the "ports".
2203          * For each Queue Set, we'll need the ability to allocate two Egress
2204          * Contexts -- one for the Ingress Queue Free List and one for the TX
2205          * Ethernet Queue.
2206          */
2207         ethqsets = vfres->niqflint - INGQ_EXTRAS;
2208         if (vfres->nethctrl != ethqsets) {
2209                 dev_warn(adapter->pdev_dev, "unequal number of [available]"
2210                          " ingress/egress queues (%d/%d); using minimum for"
2211                          " number of Queue Sets\n", ethqsets, vfres->nethctrl);
2212                 ethqsets = min(vfres->nethctrl, ethqsets);
2213         }
2214         if (vfres->neq < ethqsets*2) {
2215                 dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)"
2216                          " to support Queue Sets (%d); reducing allowed Queue"
2217                          " Sets\n", vfres->neq, ethqsets);
2218                 ethqsets = vfres->neq/2;
2219         }
2220         if (ethqsets > MAX_ETH_QSETS) {
2221                 dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue"
2222                          " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets);
2223                 ethqsets = MAX_ETH_QSETS;
2224         }
2225         if (vfres->niq != 0 || vfres->neq > ethqsets*2) {
2226                 dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)"
2227                          " ignored\n", vfres->niq, vfres->neq - ethqsets*2);
2228         }
2229         adapter->sge.max_ethqsets = ethqsets;
2230
2231         /*
2232          * Check for various parameter sanity issues.  Most checks simply
2233          * result in us using fewer resources than our provissioning but we
2234          * do need at least  one "port" with which to work ...
2235          */
2236         if (adapter->sge.max_ethqsets < adapter->params.nports) {
2237                 dev_warn(adapter->pdev_dev, "only using %d of %d available"
2238                          " virtual interfaces (too few Queue Sets)\n",
2239                          adapter->sge.max_ethqsets, adapter->params.nports);
2240                 adapter->params.nports = adapter->sge.max_ethqsets;
2241         }
2242         if (adapter->params.nports == 0) {
2243                 dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2244                         "usable!\n");
2245                 return -EINVAL;
2246         }
2247         return 0;
2248 }
2249
2250 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2251                              u8 pkt_cnt_idx, unsigned int size,
2252                              unsigned int iqe_size)
2253 {
2254         rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
2255                              (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0));
2256         rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2257                             ? pkt_cnt_idx
2258                             : 0);
2259         rspq->iqe_len = iqe_size;
2260         rspq->size = size;
2261 }
2262
2263 /*
2264  * Perform default configuration of DMA queues depending on the number and
2265  * type of ports we found and the number of available CPUs.  Most settings can
2266  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2267  * being brought up for the first time.
2268  */
2269 static void __devinit cfg_queues(struct adapter *adapter)
2270 {
2271         struct sge *s = &adapter->sge;
2272         int q10g, n10g, qidx, pidx, qs;
2273         size_t iqe_size;
2274
2275         /*
2276          * We should not be called till we know how many Queue Sets we can
2277          * support.  In particular, this means that we need to know what kind
2278          * of interrupts we'll be using ...
2279          */
2280         BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2281
2282         /*
2283          * Count the number of 10GbE Virtual Interfaces that we have.
2284          */
2285         n10g = 0;
2286         for_each_port(adapter, pidx)
2287                 n10g += is_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2288
2289         /*
2290          * We default to 1 queue per non-10G port and up to # of cores queues
2291          * per 10G port.
2292          */
2293         if (n10g == 0)
2294                 q10g = 0;
2295         else {
2296                 int n1g = (adapter->params.nports - n10g);
2297                 q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2298                 if (q10g > num_online_cpus())
2299                         q10g = num_online_cpus();
2300         }
2301
2302         /*
2303          * Allocate the "Queue Sets" to the various Virtual Interfaces.
2304          * The layout will be established in setup_sge_queues() when the
2305          * adapter is brough up for the first time.
2306          */
2307         qidx = 0;
2308         for_each_port(adapter, pidx) {
2309                 struct port_info *pi = adap2pinfo(adapter, pidx);
2310
2311                 pi->first_qset = qidx;
2312                 pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
2313                 qidx += pi->nqsets;
2314         }
2315         s->ethqsets = qidx;
2316
2317         /*
2318          * The Ingress Queue Entry Size for our various Response Queues needs
2319          * to be big enough to accommodate the largest message we can receive
2320          * from the chip/firmware; which is 64 bytes ...
2321          */
2322         iqe_size = 64;
2323
2324         /*
2325          * Set up default Queue Set parameters ...  Start off with the
2326          * shortest interrupt holdoff timer.
2327          */
2328         for (qs = 0; qs < s->max_ethqsets; qs++) {
2329                 struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2330                 struct sge_eth_txq *txq = &s->ethtxq[qs];
2331
2332                 init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2333                 rxq->fl.size = 72;
2334                 txq->q.size = 1024;
2335         }
2336
2337         /*
2338          * The firmware event queue is used for link state changes and
2339          * notifications of TX DMA completions.
2340          */
2341         init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2342
2343         /*
2344          * The forwarded interrupt queue is used when we're in MSI interrupt
2345          * mode.  In this mode all interrupts associated with RX queues will
2346          * be forwarded to a single queue which we'll associate with our MSI
2347          * interrupt vector.  The messages dropped in the forwarded interrupt
2348          * queue will indicate which ingress queue needs servicing ...  This
2349          * queue needs to be large enough to accommodate all of the ingress
2350          * queues which are forwarding their interrupt (+1 to prevent the PIDX
2351          * from equalling the CIDX if every ingress queue has an outstanding
2352          * interrupt).  The queue doesn't need to be any larger because no
2353          * ingress queue will ever have more than one outstanding interrupt at
2354          * any time ...
2355          */
2356         init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2357                   iqe_size);
2358 }
2359
2360 /*
2361  * Reduce the number of Ethernet queues across all ports to at most n.
2362  * n provides at least one queue per port.
2363  */
2364 static void __devinit reduce_ethqs(struct adapter *adapter, int n)
2365 {
2366         int i;
2367         struct port_info *pi;
2368
2369         /*
2370          * While we have too many active Ether Queue Sets, interate across the
2371          * "ports" and reduce their individual Queue Set allocations.
2372          */
2373         BUG_ON(n < adapter->params.nports);
2374         while (n < adapter->sge.ethqsets)
2375                 for_each_port(adapter, i) {
2376                         pi = adap2pinfo(adapter, i);
2377                         if (pi->nqsets > 1) {
2378                                 pi->nqsets--;
2379                                 adapter->sge.ethqsets--;
2380                                 if (adapter->sge.ethqsets <= n)
2381                                         break;
2382                         }
2383                 }
2384
2385         /*
2386          * Reassign the starting Queue Sets for each of the "ports" ...
2387          */
2388         n = 0;
2389         for_each_port(adapter, i) {
2390                 pi = adap2pinfo(adapter, i);
2391                 pi->first_qset = n;
2392                 n += pi->nqsets;
2393         }
2394 }
2395
2396 /*
2397  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2398  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2399  * need.  Minimally we need one for every Virtual Interface plus those needed
2400  * for our "extras".  Note that this process may lower the maximum number of
2401  * allowed Queue Sets ...
2402  */
2403 static int __devinit enable_msix(struct adapter *adapter)
2404 {
2405         int i, err, want, need;
2406         struct msix_entry entries[MSIX_ENTRIES];
2407         struct sge *s = &adapter->sge;
2408
2409         for (i = 0; i < MSIX_ENTRIES; ++i)
2410                 entries[i].entry = i;
2411
2412         /*
2413          * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2414          * plus those needed for our "extras" (for example, the firmware
2415          * message queue).  We _need_ at least one "Queue Set" per Virtual
2416          * Interface plus those needed for our "extras".  So now we get to see
2417          * if the song is right ...
2418          */
2419         want = s->max_ethqsets + MSIX_EXTRAS;
2420         need = adapter->params.nports + MSIX_EXTRAS;
2421         while ((err = pci_enable_msix(adapter->pdev, entries, want)) >= need)
2422                 want = err;
2423
2424         if (err == 0) {
2425                 int nqsets = want - MSIX_EXTRAS;
2426                 if (nqsets < s->max_ethqsets) {
2427                         dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2428                                  " for %d Queue Sets\n", nqsets);
2429                         s->max_ethqsets = nqsets;
2430                         if (nqsets < s->ethqsets)
2431                                 reduce_ethqs(adapter, nqsets);
2432                 }
2433                 for (i = 0; i < want; ++i)
2434                         adapter->msix_info[i].vec = entries[i].vector;
2435         } else if (err > 0) {
2436                 pci_disable_msix(adapter->pdev);
2437                 dev_info(adapter->pdev_dev, "only %d MSI-X vectors left,"
2438                          " not using MSI-X\n", err);
2439         }
2440         return err;
2441 }
2442
2443 static const struct net_device_ops cxgb4vf_netdev_ops   = {
2444         .ndo_open               = cxgb4vf_open,
2445         .ndo_stop               = cxgb4vf_stop,
2446         .ndo_start_xmit         = t4vf_eth_xmit,
2447         .ndo_get_stats          = cxgb4vf_get_stats,
2448         .ndo_set_rx_mode        = cxgb4vf_set_rxmode,
2449         .ndo_set_mac_address    = cxgb4vf_set_mac_addr,
2450         .ndo_validate_addr      = eth_validate_addr,
2451         .ndo_do_ioctl           = cxgb4vf_do_ioctl,
2452         .ndo_change_mtu         = cxgb4vf_change_mtu,
2453         .ndo_fix_features       = cxgb4vf_fix_features,
2454         .ndo_set_features       = cxgb4vf_set_features,
2455 #ifdef CONFIG_NET_POLL_CONTROLLER
2456         .ndo_poll_controller    = cxgb4vf_poll_controller,
2457 #endif
2458 };
2459
2460 /*
2461  * "Probe" a device: initialize a device and construct all kernel and driver
2462  * state needed to manage the device.  This routine is called "init_one" in
2463  * the PF Driver ...
2464  */
2465 static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev,
2466                                        const struct pci_device_id *ent)
2467 {
2468         static int version_printed;
2469
2470         int pci_using_dac;
2471         int err, pidx;
2472         unsigned int pmask;
2473         struct adapter *adapter;
2474         struct port_info *pi;
2475         struct net_device *netdev;
2476
2477         /*
2478          * Print our driver banner the first time we're called to initialize a
2479          * device.
2480          */
2481         if (version_printed == 0) {
2482                 printk(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
2483                 version_printed = 1;
2484         }
2485
2486         /*
2487          * Initialize generic PCI device state.
2488          */
2489         err = pci_enable_device(pdev);
2490         if (err) {
2491                 dev_err(&pdev->dev, "cannot enable PCI device\n");
2492                 return err;
2493         }
2494
2495         /*
2496          * Reserve PCI resources for the device.  If we can't get them some
2497          * other driver may have already claimed the device ...
2498          */
2499         err = pci_request_regions(pdev, KBUILD_MODNAME);
2500         if (err) {
2501                 dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2502                 goto err_disable_device;
2503         }
2504
2505         /*
2506          * Set up our DMA mask: try for 64-bit address masking first and
2507          * fall back to 32-bit if we can't get 64 bits ...
2508          */
2509         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2510         if (err == 0) {
2511                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2512                 if (err) {
2513                         dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2514                                 " coherent allocations\n");
2515                         goto err_release_regions;
2516                 }
2517                 pci_using_dac = 1;
2518         } else {
2519                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2520                 if (err != 0) {
2521                         dev_err(&pdev->dev, "no usable DMA configuration\n");
2522                         goto err_release_regions;
2523                 }
2524                 pci_using_dac = 0;
2525         }
2526
2527         /*
2528          * Enable bus mastering for the device ...
2529          */
2530         pci_set_master(pdev);
2531
2532         /*
2533          * Allocate our adapter data structure and attach it to the device.
2534          */
2535         adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2536         if (!adapter) {
2537                 err = -ENOMEM;
2538                 goto err_release_regions;
2539         }
2540         pci_set_drvdata(pdev, adapter);
2541         adapter->pdev = pdev;
2542         adapter->pdev_dev = &pdev->dev;
2543
2544         /*
2545          * Initialize SMP data synchronization resources.
2546          */
2547         spin_lock_init(&adapter->stats_lock);
2548
2549         /*
2550          * Map our I/O registers in BAR0.
2551          */
2552         adapter->regs = pci_ioremap_bar(pdev, 0);
2553         if (!adapter->regs) {
2554                 dev_err(&pdev->dev, "cannot map device registers\n");
2555                 err = -ENOMEM;
2556                 goto err_free_adapter;
2557         }
2558
2559         /*
2560          * Initialize adapter level features.
2561          */
2562         adapter->name = pci_name(pdev);
2563         adapter->msg_enable = dflt_msg_enable;
2564         err = adap_init0(adapter);
2565         if (err)
2566                 goto err_unmap_bar;
2567
2568         /*
2569          * Allocate our "adapter ports" and stitch everything together.
2570          */
2571         pmask = adapter->params.vfres.pmask;
2572         for_each_port(adapter, pidx) {
2573                 int port_id, viid;
2574
2575                 /*
2576                  * We simplistically allocate our virtual interfaces
2577                  * sequentially across the port numbers to which we have
2578                  * access rights.  This should be configurable in some manner
2579                  * ...
2580                  */
2581                 if (pmask == 0)
2582                         break;
2583                 port_id = ffs(pmask) - 1;
2584                 pmask &= ~(1 << port_id);
2585                 viid = t4vf_alloc_vi(adapter, port_id);
2586                 if (viid < 0) {
2587                         dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2588                                 " err=%d\n", port_id, viid);
2589                         err = viid;
2590                         goto err_free_dev;
2591                 }
2592
2593                 /*
2594                  * Allocate our network device and stitch things together.
2595                  */
2596                 netdev = alloc_etherdev_mq(sizeof(struct port_info),
2597                                            MAX_PORT_QSETS);
2598                 if (netdev == NULL) {
2599                         dev_err(&pdev->dev, "cannot allocate netdev for"
2600                                 " port %d\n", port_id);
2601                         t4vf_free_vi(adapter, viid);
2602                         err = -ENOMEM;
2603                         goto err_free_dev;
2604                 }
2605                 adapter->port[pidx] = netdev;
2606                 SET_NETDEV_DEV(netdev, &pdev->dev);
2607                 pi = netdev_priv(netdev);
2608                 pi->adapter = adapter;
2609                 pi->pidx = pidx;
2610                 pi->port_id = port_id;
2611                 pi->viid = viid;
2612
2613                 /*
2614                  * Initialize the starting state of our "port" and register
2615                  * it.
2616                  */
2617                 pi->xact_addr_filt = -1;
2618                 netif_carrier_off(netdev);
2619                 netdev->irq = pdev->irq;
2620
2621                 netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2622                         NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2623                         NETIF_F_HW_VLAN_RX | NETIF_F_RXCSUM;
2624                 netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2625                         NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2626                         NETIF_F_HIGHDMA;
2627                 netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_TX;
2628                 if (pci_using_dac)
2629                         netdev->features |= NETIF_F_HIGHDMA;
2630
2631                 netdev->priv_flags |= IFF_UNICAST_FLT;
2632
2633                 netdev->netdev_ops = &cxgb4vf_netdev_ops;
2634                 SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops);
2635
2636                 /*
2637                  * Initialize the hardware/software state for the port.
2638                  */
2639                 err = t4vf_port_init(adapter, pidx);
2640                 if (err) {
2641                         dev_err(&pdev->dev, "cannot initialize port %d\n",
2642                                 pidx);
2643                         goto err_free_dev;
2644                 }
2645         }
2646
2647         /*
2648          * The "card" is now ready to go.  If any errors occur during device
2649          * registration we do not fail the whole "card" but rather proceed
2650          * only with the ports we manage to register successfully.  However we
2651          * must register at least one net device.
2652          */
2653         for_each_port(adapter, pidx) {
2654                 netdev = adapter->port[pidx];
2655                 if (netdev == NULL)
2656                         continue;
2657
2658                 err = register_netdev(netdev);
2659                 if (err) {
2660                         dev_warn(&pdev->dev, "cannot register net device %s,"
2661                                  " skipping\n", netdev->name);
2662                         continue;
2663                 }
2664
2665                 set_bit(pidx, &adapter->registered_device_map);
2666         }
2667         if (adapter->registered_device_map == 0) {
2668                 dev_err(&pdev->dev, "could not register any net devices\n");
2669                 goto err_free_dev;
2670         }
2671
2672         /*
2673          * Set up our debugfs entries.
2674          */
2675         if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
2676                 adapter->debugfs_root =
2677                         debugfs_create_dir(pci_name(pdev),
2678                                            cxgb4vf_debugfs_root);
2679                 if (IS_ERR_OR_NULL(adapter->debugfs_root))
2680                         dev_warn(&pdev->dev, "could not create debugfs"
2681                                  " directory");
2682                 else
2683                         setup_debugfs(adapter);
2684         }
2685
2686         /*
2687          * See what interrupts we'll be using.  If we've been configured to
2688          * use MSI-X interrupts, try to enable them but fall back to using
2689          * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
2690          * get MSI interrupts we bail with the error.
2691          */
2692         if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2693                 adapter->flags |= USING_MSIX;
2694         else {
2695                 err = pci_enable_msi(pdev);
2696                 if (err) {
2697                         dev_err(&pdev->dev, "Unable to allocate %s interrupts;"
2698                                 " err=%d\n",
2699                                 msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err);
2700                         goto err_free_debugfs;
2701                 }
2702                 adapter->flags |= USING_MSI;
2703         }
2704
2705         /*
2706          * Now that we know how many "ports" we have and what their types are,
2707          * and how many Queue Sets we can support, we can configure our queue
2708          * resources.
2709          */
2710         cfg_queues(adapter);
2711
2712         /*
2713          * Print a short notice on the existence and configuration of the new
2714          * VF network device ...
2715          */
2716         for_each_port(adapter, pidx) {
2717                 dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
2718                          adapter->port[pidx]->name,
2719                          (adapter->flags & USING_MSIX) ? "MSI-X" :
2720                          (adapter->flags & USING_MSI)  ? "MSI" : "");
2721         }
2722
2723         /*
2724          * Return success!
2725          */
2726         return 0;
2727
2728         /*
2729          * Error recovery and exit code.  Unwind state that's been created
2730          * so far and return the error.
2731          */
2732
2733 err_free_debugfs:
2734         if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2735                 cleanup_debugfs(adapter);
2736                 debugfs_remove_recursive(adapter->debugfs_root);
2737         }
2738
2739 err_free_dev:
2740         for_each_port(adapter, pidx) {
2741                 netdev = adapter->port[pidx];
2742                 if (netdev == NULL)
2743                         continue;
2744                 pi = netdev_priv(netdev);
2745                 t4vf_free_vi(adapter, pi->viid);
2746                 if (test_bit(pidx, &adapter->registered_device_map))
2747                         unregister_netdev(netdev);
2748                 free_netdev(netdev);
2749         }
2750
2751 err_unmap_bar:
2752         iounmap(adapter->regs);
2753
2754 err_free_adapter:
2755         kfree(adapter);
2756         pci_set_drvdata(pdev, NULL);
2757
2758 err_release_regions:
2759         pci_release_regions(pdev);
2760         pci_set_drvdata(pdev, NULL);
2761         pci_clear_master(pdev);
2762
2763 err_disable_device:
2764         pci_disable_device(pdev);
2765
2766         return err;
2767 }
2768
2769 /*
2770  * "Remove" a device: tear down all kernel and driver state created in the
2771  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
2772  * that this is called "remove_one" in the PF Driver.)
2773  */
2774 static void __devexit cxgb4vf_pci_remove(struct pci_dev *pdev)
2775 {
2776         struct adapter *adapter = pci_get_drvdata(pdev);
2777
2778         /*
2779          * Tear down driver state associated with device.
2780          */
2781         if (adapter) {
2782                 int pidx;
2783
2784                 /*
2785                  * Stop all of our activity.  Unregister network port,
2786                  * disable interrupts, etc.
2787                  */
2788                 for_each_port(adapter, pidx)
2789                         if (test_bit(pidx, &adapter->registered_device_map))
2790                                 unregister_netdev(adapter->port[pidx]);
2791                 t4vf_sge_stop(adapter);
2792                 if (adapter->flags & USING_MSIX) {
2793                         pci_disable_msix(adapter->pdev);
2794                         adapter->flags &= ~USING_MSIX;
2795                 } else if (adapter->flags & USING_MSI) {
2796                         pci_disable_msi(adapter->pdev);
2797                         adapter->flags &= ~USING_MSI;
2798                 }
2799
2800                 /*
2801                  * Tear down our debugfs entries.
2802                  */
2803                 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2804                         cleanup_debugfs(adapter);
2805                         debugfs_remove_recursive(adapter->debugfs_root);
2806                 }
2807
2808                 /*
2809                  * Free all of the various resources which we've acquired ...
2810                  */
2811                 t4vf_free_sge_resources(adapter);
2812                 for_each_port(adapter, pidx) {
2813                         struct net_device *netdev = adapter->port[pidx];
2814                         struct port_info *pi;
2815
2816                         if (netdev == NULL)
2817                                 continue;
2818
2819                         pi = netdev_priv(netdev);
2820                         t4vf_free_vi(adapter, pi->viid);
2821                         free_netdev(netdev);
2822                 }
2823                 iounmap(adapter->regs);
2824                 kfree(adapter);
2825                 pci_set_drvdata(pdev, NULL);
2826         }
2827
2828         /*
2829          * Disable the device and release its PCI resources.
2830          */
2831         pci_disable_device(pdev);
2832         pci_clear_master(pdev);
2833         pci_release_regions(pdev);
2834 }
2835
2836 /*
2837  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
2838  * delivery.
2839  */
2840 static void __devexit cxgb4vf_pci_shutdown(struct pci_dev *pdev)
2841 {
2842         struct adapter *adapter;
2843         int pidx;
2844
2845         adapter = pci_get_drvdata(pdev);
2846         if (!adapter)
2847                 return;
2848
2849         /*
2850          * Disable all Virtual Interfaces.  This will shut down the
2851          * delivery of all ingress packets into the chip for these
2852          * Virtual Interfaces.
2853          */
2854         for_each_port(adapter, pidx) {
2855                 struct net_device *netdev;
2856                 struct port_info *pi;
2857
2858                 if (!test_bit(pidx, &adapter->registered_device_map))
2859                         continue;
2860
2861                 netdev = adapter->port[pidx];
2862                 if (!netdev)
2863                         continue;
2864
2865                 pi = netdev_priv(netdev);
2866                 t4vf_enable_vi(adapter, pi->viid, false, false);
2867         }
2868
2869         /*
2870          * Free up all Queues which will prevent further DMA and
2871          * Interrupts allowing various internal pathways to drain.
2872          */
2873         t4vf_free_sge_resources(adapter);
2874 }
2875
2876 /*
2877  * PCI Device registration data structures.
2878  */
2879 #define CH_DEVICE(devid, idx) \
2880         { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, PCI_ANY_ID, 0, 0, idx }
2881
2882 static struct pci_device_id cxgb4vf_pci_tbl[] = {
2883         CH_DEVICE(0xb000, 0),   /* PE10K FPGA */
2884         CH_DEVICE(0x4800, 0),   /* T440-dbg */
2885         CH_DEVICE(0x4801, 0),   /* T420-cr */
2886         CH_DEVICE(0x4802, 0),   /* T422-cr */
2887         CH_DEVICE(0x4803, 0),   /* T440-cr */
2888         CH_DEVICE(0x4804, 0),   /* T420-bch */
2889         CH_DEVICE(0x4805, 0),   /* T440-bch */
2890         CH_DEVICE(0x4806, 0),   /* T460-ch */
2891         CH_DEVICE(0x4807, 0),   /* T420-so */
2892         CH_DEVICE(0x4808, 0),   /* T420-cx */
2893         CH_DEVICE(0x4809, 0),   /* T420-bt */
2894         CH_DEVICE(0x480a, 0),   /* T404-bt */
2895         { 0, }
2896 };
2897
2898 MODULE_DESCRIPTION(DRV_DESC);
2899 MODULE_AUTHOR("Chelsio Communications");
2900 MODULE_LICENSE("Dual BSD/GPL");
2901 MODULE_VERSION(DRV_VERSION);
2902 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
2903
2904 static struct pci_driver cxgb4vf_driver = {
2905         .name           = KBUILD_MODNAME,
2906         .id_table       = cxgb4vf_pci_tbl,
2907         .probe          = cxgb4vf_pci_probe,
2908         .remove         = __devexit_p(cxgb4vf_pci_remove),
2909         .shutdown       = __devexit_p(cxgb4vf_pci_shutdown),
2910 };
2911
2912 /*
2913  * Initialize global driver state.
2914  */
2915 static int __init cxgb4vf_module_init(void)
2916 {
2917         int ret;
2918
2919         /*
2920          * Vet our module parameters.
2921          */
2922         if (msi != MSI_MSIX && msi != MSI_MSI) {
2923                 printk(KERN_WARNING KBUILD_MODNAME
2924                        ": bad module parameter msi=%d; must be %d"
2925                        " (MSI-X or MSI) or %d (MSI)\n",
2926                        msi, MSI_MSIX, MSI_MSI);
2927                 return -EINVAL;
2928         }
2929
2930         /* Debugfs support is optional, just warn if this fails */
2931         cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
2932         if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2933                 printk(KERN_WARNING KBUILD_MODNAME ": could not create"
2934                        " debugfs entry, continuing\n");
2935
2936         ret = pci_register_driver(&cxgb4vf_driver);
2937         if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2938                 debugfs_remove(cxgb4vf_debugfs_root);
2939         return ret;
2940 }
2941
2942 /*
2943  * Tear down global driver state.
2944  */
2945 static void __exit cxgb4vf_module_exit(void)
2946 {
2947         pci_unregister_driver(&cxgb4vf_driver);
2948         debugfs_remove(cxgb4vf_debugfs_root);
2949 }
2950
2951 module_init(cxgb4vf_module_init);
2952 module_exit(cxgb4vf_module_exit);