drivers/net/cxgb4vf/cxgb4vf_main.c

   1 /*
   2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
   3  * driver for Linux.
   4  *
   5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
   6  *
   7  * This software is available to you under a choice of one of two
   8  * licenses.  You may choose to be licensed under the terms of the GNU
   9  * General Public License (GPL) Version 2, available from the file
  10  * COPYING in the main directory of this source tree, or the
  11  * OpenIB.org BSD license below:
  12  *
  13  *     Redistribution and use in source and binary forms, with or
  14  *     without modification, are permitted provided that the following
  15  *     conditions are met:
  16  *
  17  *      - Redistributions of source code must retain the above
  18  *        copyright notice, this list of conditions and the following
  19  *        disclaimer.
  20  *
  21  *      - Redistributions in binary form must reproduce the above
  22  *        copyright notice, this list of conditions and the following
  23  *        disclaimer in the documentation and/or other materials
  24  *        provided with the distribution.
  25  *
  26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  33  * SOFTWARE.
  34  */
  35
  36 #include <linux/module.h>
  37 #include <linux/moduleparam.h>
  38 #include <linux/init.h>
  39 #include <linux/pci.h>
  40 #include <linux/dma-mapping.h>
  41 #include <linux/netdevice.h>
  42 #include <linux/etherdevice.h>
  43 #include <linux/debugfs.h>
  44 #include <linux/ethtool.h>
  45
  46 #include "t4vf_common.h"
  47 #include "t4vf_defs.h"
  48
  49 #include "../cxgb4/t4_regs.h"
  50 #include "../cxgb4/t4_msg.h"
  51
  52 /*
  53  * Generic information about the driver.
  54  */
  55 #define DRV_VERSION "1.0.0"
  56 #define DRV_DESC "Chelsio T4 Virtual Function (VF) Network Driver"
  57
  58 /*
  59  * Module Parameters.
  60  * ==================
  61  */
  62
  63 /*
  64  * Default ethtool "message level" for adapters.
  65  */
  66 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
  67                          NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
  68                          NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
  69
  70 static int dflt_msg_enable = DFLT_MSG_ENABLE;
  71
  72 module_param(dflt_msg_enable, int, 0644);
  73 MODULE_PARM_DESC(dflt_msg_enable,
  74                  "default adapter ethtool message level bitmap");
  75
  76 /*
  77  * The driver uses the best interrupt scheme available on a platform in the
  78  * order MSI-X then MSI.  This parameter determines which of these schemes the
  79  * driver may consider as follows:
  80  *
  81  *     msi = 2: choose from among MSI-X and MSI
  82  *     msi = 1: only consider MSI interrupts
  83  *
  84  * Note that unlike the Physical Function driver, this Virtual Function driver
  85  * does _not_ support legacy INTx interrupts (this limitation is mandated by
  86  * the PCI-E SR-IOV standard).
  87  */
  88 #define MSI_MSIX        2
  89 #define MSI_MSI         1
  90 #define MSI_DEFAULT     MSI_MSIX
  91
  92 static int msi = MSI_DEFAULT;
  93
  94 module_param(msi, int, 0644);
  95 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
  96
  97 /*
  98  * Fundamental constants.
  99  * ======================
 100  */
 101
 102 enum {
 103         MAX_TXQ_ENTRIES         = 16384,
 104         MAX_RSPQ_ENTRIES        = 16384,
 105         MAX_RX_BUFFERS          = 16384,
 106
 107         MIN_TXQ_ENTRIES         = 32,
 108         MIN_RSPQ_ENTRIES        = 128,
 109         MIN_FL_ENTRIES          = 16,
 110
 111         /*
 112          * For purposes of manipulating the Free List size we need to
 113          * recognize that Free Lists are actually Egress Queues (the host
 114          * produces free buffers which the hardware consumes), Egress Queues
 115          * indices are all in units of Egress Context Units bytes, and free
 116          * list entries are 64-bit PCI DMA addresses.  And since the state of
 117          * the Producer Index == the Consumer Index implies an EMPTY list, we
 118          * always have at least one Egress Unit's worth of Free List entries
 119          * unused.  See sge.c for more details ...
 120          */
 121         EQ_UNIT = SGE_EQ_IDXSIZE,
 122         FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
 123         MIN_FL_RESID = FL_PER_EQ_UNIT,
 124 };
 125
 126 /*
 127  * Global driver state.
 128  * ====================
 129  */
 130
 131 static struct dentry *cxgb4vf_debugfs_root;
 132
 133 /*
 134  * OS "Callback" functions.
 135  * ========================
 136  */
 137
 138 /*
 139  * The link status has changed on the indicated "port" (Virtual Interface).
 140  */
 141 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
 142 {
 143         struct net_device *dev = adapter->port[pidx];
 144
 145         /*
 146          * If the port is disabled or the current recorded "link up"
 147          * status matches the new status, just return.
 148          */
 149         if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
 150                 return;
 151
 152         /*
 153          * Tell the OS that the link status has changed and print a short
 154          * informative message on the console about the event.
 155          */
 156         if (link_ok) {
 157                 const char *s;
 158                 const char *fc;
 159                 const struct port_info *pi = netdev_priv(dev);
 160
 161                 netif_carrier_on(dev);
 162
 163                 switch (pi->link_cfg.speed) {
 164                 case SPEED_10000:
 165                         s = "10Gbps";
 166                         break;
 167
 168                 case SPEED_1000:
 169                         s = "1000Mbps";
 170                         break;
 171
 172                 case SPEED_100:
 173                         s = "100Mbps";
 174                         break;
 175
 176                 default:
 177                         s = "unknown";
 178                         break;
 179                 }
 180
 181                 switch (pi->link_cfg.fc) {
 182                 case PAUSE_RX:
 183                         fc = "RX";
 184                         break;
 185
 186                 case PAUSE_TX:
 187                         fc = "TX";
 188                         break;
 189
 190                 case PAUSE_RX|PAUSE_TX:
 191                         fc = "RX/TX";
 192                         break;
 193
 194                 default:
 195                         fc = "no";
 196                         break;
 197                 }
 198
 199                 printk(KERN_INFO "%s: link up, %s, full-duplex, %s PAUSE\n",
 200                        dev->name, s, fc);
 201         } else {
 202                 netif_carrier_off(dev);
 203                 printk(KERN_INFO "%s: link down\n", dev->name);
 204         }
 205 }
 206
 207 /*
 208  * Net device operations.
 209  * ======================
 210  */
 211
 212 /*
 213  * Record our new VLAN Group and enable/disable hardware VLAN Tag extraction
 214  * based on whether the specified VLAN Group pointer is NULL or not.
 215  */
 216 static void cxgb4vf_vlan_rx_register(struct net_device *dev,
 217                                      struct vlan_group *grp)
 218 {
 219         struct port_info *pi = netdev_priv(dev);
 220
 221         pi->vlan_grp = grp;
 222         t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1, grp != NULL, 0);
 223 }
 224
 225 /*
 226  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
 227  * Interface).
 228  */
 229 static int link_start(struct net_device *dev)
 230 {
 231         int ret;
 232         struct port_info *pi = netdev_priv(dev);
 233
 234         /*
 235          * We do not set address filters and promiscuity here, the stack does
 236          * that step explicitly.
 237          */
 238         ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, -1,
 239                               true);
 240         if (ret == 0) {
 241                 ret = t4vf_change_mac(pi->adapter, pi->viid,
 242                                       pi->xact_addr_filt, dev->dev_addr, true);
 243                 if (ret >= 0) {
 244                         pi->xact_addr_filt = ret;
 245                         ret = 0;
 246                 }
 247         }
 248
 249         /*
 250          * We don't need to actually "start the link" itself since the
 251          * firmware will do that for us when the first Virtual Interface
 252          * is enabled on a port.
 253          */
 254         if (ret == 0)
 255                 ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
 256         return ret;
 257 }
 258
 259 /*
 260  * Name the MSI-X interrupts.
 261  */
 262 static void name_msix_vecs(struct adapter *adapter)
 263 {
 264         int namelen = sizeof(adapter->msix_info[0].desc) - 1;
 265         int pidx;
 266
 267         /*
 268          * Firmware events.
 269          */
 270         snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
 271                  "%s-FWeventq", adapter->name);
 272         adapter->msix_info[MSIX_FW].desc[namelen] = 0;
 273
 274         /*
 275          * Ethernet queues.
 276          */
 277         for_each_port(adapter, pidx) {
 278                 struct net_device *dev = adapter->port[pidx];
 279                 const struct port_info *pi = netdev_priv(dev);
 280                 int qs, msi;
 281
 282                 for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
 283                         snprintf(adapter->msix_info[msi].desc, namelen,
 284                                  "%s-%d", dev->name, qs);
 285                         adapter->msix_info[msi].desc[namelen] = 0;
 286                 }
 287         }
 288 }
 289
 290 /*
 291  * Request all of our MSI-X resources.
 292  */
 293 static int request_msix_queue_irqs(struct adapter *adapter)
 294 {
 295         struct sge *s = &adapter->sge;
 296         int rxq, msi, err;
 297
 298         /*
 299          * Firmware events.
 300          */
 301         err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
 302                           0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
 303         if (err)
 304                 return err;
 305
 306         /*
 307          * Ethernet queues.
 308          */
 309         msi = MSIX_IQFLINT;
 310         for_each_ethrxq(s, rxq) {
 311                 err = request_irq(adapter->msix_info[msi].vec,
 312                                   t4vf_sge_intr_msix, 0,
 313                                   adapter->msix_info[msi].desc,
 314                                   &s->ethrxq[rxq].rspq);
 315                 if (err)
 316                         goto err_free_irqs;
 317                 msi++;
 318         }
 319         return 0;
 320
 321 err_free_irqs:
 322         while (--rxq >= 0)
 323                 free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
 324         free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 325         return err;
 326 }
 327
 328 /*
 329  * Free our MSI-X resources.
 330  */
 331 static void free_msix_queue_irqs(struct adapter *adapter)
 332 {
 333         struct sge *s = &adapter->sge;
 334         int rxq, msi;
 335
 336         free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
 337         msi = MSIX_IQFLINT;
 338         for_each_ethrxq(s, rxq)
 339                 free_irq(adapter->msix_info[msi++].vec,
 340                          &s->ethrxq[rxq].rspq);
 341 }
 342
 343 /*
 344  * Turn on NAPI and start up interrupts on a response queue.
 345  */
 346 static void qenable(struct sge_rspq *rspq)
 347 {
 348         napi_enable(&rspq->napi);
 349
 350         /*
 351          * 0-increment the Going To Sleep register to start the timer and
 352          * enable interrupts.
 353          */
 354         t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 355                      CIDXINC(0) |
 356                      SEINTARM(rspq->intr_params) |
 357                      INGRESSQID(rspq->cntxt_id));
 358 }
 359
 360 /*
 361  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
 362  */
 363 static void enable_rx(struct adapter *adapter)
 364 {
 365         int rxq;
 366         struct sge *s = &adapter->sge;
 367
 368         for_each_ethrxq(s, rxq)
 369                 qenable(&s->ethrxq[rxq].rspq);
 370         qenable(&s->fw_evtq);
 371
 372         /*
 373          * The interrupt queue doesn't use NAPI so we do the 0-increment of
 374          * its Going To Sleep register here to get it started.
 375          */
 376         if (adapter->flags & USING_MSI)
 377                 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
 378                              CIDXINC(0) |
 379                              SEINTARM(s->intrq.intr_params) |
 380                              INGRESSQID(s->intrq.cntxt_id));
 381
 382 }
 383
 384 /*
 385  * Wait until all NAPI handlers are descheduled.
 386  */
 387 static void quiesce_rx(struct adapter *adapter)
 388 {
 389         struct sge *s = &adapter->sge;
 390         int rxq;
 391
 392         for_each_ethrxq(s, rxq)
 393                 napi_disable(&s->ethrxq[rxq].rspq.napi);
 394         napi_disable(&s->fw_evtq.napi);
 395 }
 396
 397 /*
 398  * Response queue handler for the firmware event queue.
 399  */
 400 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
 401                           const struct pkt_gl *gl)
 402 {
 403         /*
 404          * Extract response opcode and get pointer to CPL message body.
 405          */
 406         struct adapter *adapter = rspq->adapter;
 407         u8 opcode = ((const struct rss_header *)rsp)->opcode;
 408         void *cpl = (void *)(rsp + 1);
 409
 410         switch (opcode) {
 411         case CPL_FW6_MSG: {
 412                 /*
 413                  * We've received an asynchronous message from the firmware.
 414                  */
 415                 const struct cpl_fw6_msg *fw_msg = cpl;
 416                 if (fw_msg->type == FW6_TYPE_CMD_RPL)
 417                         t4vf_handle_fw_rpl(adapter, fw_msg->data);
 418                 break;
 419         }
 420
 421         case CPL_SGE_EGR_UPDATE: {
 422                 /*
 423                  * We've received an Egress Queue Status Update message.  We
 424                  * get these, if the SGE is configured to send these when the
 425                  * firmware passes certain points in processing our TX
 426                  * Ethernet Queue or if we make an explicit request for one.
 427                  * We use these updates to determine when we may need to
 428                  * restart a TX Ethernet Queue which was stopped for lack of
 429                  * free TX Queue Descriptors ...
 430                  */
 431                 const struct cpl_sge_egr_update *p = (void *)cpl;
 432                 unsigned int qid = EGR_QID(be32_to_cpu(p->opcode_qid));
 433                 struct sge *s = &adapter->sge;
 434                 struct sge_txq *tq;
 435                 struct sge_eth_txq *txq;
 436                 unsigned int eq_idx;
 437
 438                 /*
 439                  * Perform sanity checking on the Queue ID to make sure it
 440                  * really refers to one of our TX Ethernet Egress Queues which
 441                  * is active and matches the queue's ID.  None of these error
 442                  * conditions should ever happen so we may want to either make
 443                  * them fatal and/or conditionalized under DEBUG.
 444                  */
 445                 eq_idx = EQ_IDX(s, qid);
 446                 if (unlikely(eq_idx >= MAX_EGRQ)) {
 447                         dev_err(adapter->pdev_dev,
 448                                 "Egress Update QID %d out of range\n", qid);
 449                         break;
 450                 }
 451                 tq = s->egr_map[eq_idx];
 452                 if (unlikely(tq == NULL)) {
 453                         dev_err(adapter->pdev_dev,
 454                                 "Egress Update QID %d TXQ=NULL\n", qid);
 455                         break;
 456                 }
 457                 txq = container_of(tq, struct sge_eth_txq, q);
 458                 if (unlikely(tq->abs_id != qid)) {
 459                         dev_err(adapter->pdev_dev,
 460                                 "Egress Update QID %d refers to TXQ %d\n",
 461                                 qid, tq->abs_id);
 462                         break;
 463                 }
 464
 465                 /*
 466                  * Restart a stopped TX Queue which has less than half of its
 467                  * TX ring in use ...
 468                  */
 469                 txq->q.restarts++;
 470                 netif_tx_wake_queue(txq->txq);
 471                 break;
 472         }
 473
 474         default:
 475                 dev_err(adapter->pdev_dev,
 476                         "unexpected CPL %#x on FW event queue\n", opcode);
 477         }
 478
 479         return 0;
 480 }
 481
 482 /*
 483  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
 484  * to use and initializes them.  We support multiple "Queue Sets" per port if
 485  * we have MSI-X, otherwise just one queue set per port.
 486  */
 487 static int setup_sge_queues(struct adapter *adapter)
 488 {
 489         struct sge *s = &adapter->sge;
 490         int err, pidx, msix;
 491
 492         /*
 493          * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
 494          * state.
 495          */
 496         bitmap_zero(s->starving_fl, MAX_EGRQ);
 497
 498         /*
 499          * If we're using MSI interrupt mode we need to set up a "forwarded
 500          * interrupt" queue which we'll set up with our MSI vector.  The rest
 501          * of the ingress queues will be set up to forward their interrupts to
 502          * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
 503          * the intrq's queue ID as the interrupt forwarding queue for the
 504          * subsequent calls ...
 505          */
 506         if (adapter->flags & USING_MSI) {
 507                 err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
 508                                          adapter->port[0], 0, NULL, NULL);
 509                 if (err)
 510                         goto err_free_queues;
 511         }
 512
 513         /*
 514          * Allocate our ingress queue for asynchronous firmware messages.
 515          */
 516         err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
 517                                  MSIX_FW, NULL, fwevtq_handler);
 518         if (err)
 519                 goto err_free_queues;
 520
 521         /*
 522          * Allocate each "port"'s initial Queue Sets.  These can be changed
 523          * later on ... up to the point where any interface on the adapter is
 524          * brought up at which point lots of things get nailed down
 525          * permanently ...
 526          */
 527         msix = MSIX_IQFLINT;
 528         for_each_port(adapter, pidx) {
 529                 struct net_device *dev = adapter->port[pidx];
 530                 struct port_info *pi = netdev_priv(dev);
 531                 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 532                 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 533                 int qs;
 534
 535                 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 536                         err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
 537                                                  dev, msix++,
 538                                                  &rxq->fl, t4vf_ethrx_handler);
 539                         if (err)
 540                                 goto err_free_queues;
 541
 542                         err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
 543                                              netdev_get_tx_queue(dev, qs),
 544                                              s->fw_evtq.cntxt_id);
 545                         if (err)
 546                                 goto err_free_queues;
 547
 548                         rxq->rspq.idx = qs;
 549                         memset(&rxq->stats, 0, sizeof(rxq->stats));
 550                 }
 551         }
 552
 553         /*
 554          * Create the reverse mappings for the queues.
 555          */
 556         s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
 557         s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
 558         IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
 559         for_each_port(adapter, pidx) {
 560                 struct net_device *dev = adapter->port[pidx];
 561                 struct port_info *pi = netdev_priv(dev);
 562                 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
 563                 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
 564                 int qs;
 565
 566                 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
 567                         IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
 568                         EQ_MAP(s, txq->q.abs_id) = &txq->q;
 569
 570                         /*
 571                          * The FW_IQ_CMD doesn't return the Absolute Queue IDs
 572                          * for Free Lists but since all of the Egress Queues
 573                          * (including Free Lists) have Relative Queue IDs
 574                          * which are computed as Absolute - Base Queue ID, we
 575                          * can synthesize the Absolute Queue IDs for the Free
 576                          * Lists.  This is useful for debugging purposes when
 577                          * we want to dump Queue Contexts via the PF Driver.
 578                          */
 579                         rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
 580                         EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
 581                 }
 582         }
 583         return 0;
 584
 585 err_free_queues:
 586         t4vf_free_sge_resources(adapter);
 587         return err;
 588 }
 589
 590 /*
 591  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
 592  * queues.  We configure the RSS CPU lookup table to distribute to the number
 593  * of HW receive queues, and the response queue lookup table to narrow that
 594  * down to the response queues actually configured for each "port" (Virtual
 595  * Interface).  We always configure the RSS mapping for all ports since the
 596  * mapping table has plenty of entries.
 597  */
 598 static int setup_rss(struct adapter *adapter)
 599 {
 600         int pidx;
 601
 602         for_each_port(adapter, pidx) {
 603                 struct port_info *pi = adap2pinfo(adapter, pidx);
 604                 struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
 605                 u16 rss[MAX_PORT_QSETS];
 606                 int qs, err;
 607
 608                 for (qs = 0; qs < pi->nqsets; qs++)
 609                         rss[qs] = rxq[qs].rspq.abs_id;
 610
 611                 err = t4vf_config_rss_range(adapter, pi->viid,
 612                                             0, pi->rss_size, rss, pi->nqsets);
 613                 if (err)
 614                         return err;
 615
 616                 /*
 617                  * Perform Global RSS Mode-specific initialization.
 618                  */
 619                 switch (adapter->params.rss.mode) {
 620                 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
 621                         /*
 622                          * If Tunnel All Lookup isn't specified in the global
 623                          * RSS Configuration, then we need to specify a
 624                          * default Ingress Queue for any ingress packets which
 625                          * aren't hashed.  We'll use our first ingress queue
 626                          * ...
 627                          */
 628                         if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
 629                                 union rss_vi_config config;
 630                                 err = t4vf_read_rss_vi_config(adapter,
 631                                                               pi->viid,
 632                                                               &config);
 633                                 if (err)
 634                                         return err;
 635                                 config.basicvirtual.defaultq =
 636                                         rxq[0].rspq.abs_id;
 637                                 err = t4vf_write_rss_vi_config(adapter,
 638                                                                pi->viid,
 639                                                                &config);
 640                                 if (err)
 641                                         return err;
 642                         }
 643                         break;
 644                 }
 645         }
 646
 647         return 0;
 648 }
 649
 650 /*
 651  * Bring the adapter up.  Called whenever we go from no "ports" open to having
 652  * one open.  This function performs the actions necessary to make an adapter
 653  * operational, such as completing the initialization of HW modules, and
 654  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
 655  * this is called "cxgb_up" in the PF Driver.)
 656  */
 657 static int adapter_up(struct adapter *adapter)
 658 {
 659         int err;
 660
 661         /*
 662          * If this is the first time we've been called, perform basic
 663          * adapter setup.  Once we've done this, many of our adapter
 664          * parameters can no longer be changed ...
 665          */
 666         if ((adapter->flags & FULL_INIT_DONE) == 0) {
 667                 err = setup_sge_queues(adapter);
 668                 if (err)
 669                         return err;
 670                 err = setup_rss(adapter);
 671                 if (err) {
 672                         t4vf_free_sge_resources(adapter);
 673                         return err;
 674                 }
 675
 676                 if (adapter->flags & USING_MSIX)
 677                         name_msix_vecs(adapter);
 678                 adapter->flags |= FULL_INIT_DONE;
 679         }
 680
 681         /*
 682          * Acquire our interrupt resources.  We only support MSI-X and MSI.
 683          */
 684         BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
 685         if (adapter->flags & USING_MSIX)
 686                 err = request_msix_queue_irqs(adapter);
 687         else
 688                 err = request_irq(adapter->pdev->irq,
 689                                   t4vf_intr_handler(adapter), 0,
 690                                   adapter->name, adapter);
 691         if (err) {
 692                 dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
 693                         err);
 694                 return err;
 695         }
 696
 697         /*
 698          * Enable NAPI ingress processing and return success.
 699          */
 700         enable_rx(adapter);
 701         t4vf_sge_start(adapter);
 702         return 0;
 703 }
 704
 705 /*
 706  * Bring the adapter down.  Called whenever the last "port" (Virtual
 707  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
 708  * Driver.)
 709  */
 710 static void adapter_down(struct adapter *adapter)
 711 {
 712         /*
 713          * Free interrupt resources.
 714          */
 715         if (adapter->flags & USING_MSIX)
 716                 free_msix_queue_irqs(adapter);
 717         else
 718                 free_irq(adapter->pdev->irq, adapter);
 719
 720         /*
 721          * Wait for NAPI handlers to finish.
 722          */
 723         quiesce_rx(adapter);
 724 }
 725
 726 /*
 727  * Start up a net device.
 728  */
 729 static int cxgb4vf_open(struct net_device *dev)
 730 {
 731         int err;
 732         struct port_info *pi = netdev_priv(dev);
 733         struct adapter *adapter = pi->adapter;
 734
 735         /*
 736          * If this is the first interface that we're opening on the "adapter",
 737          * bring the "adapter" up now.
 738          */
 739         if (adapter->open_device_map == 0) {
 740                 err = adapter_up(adapter);
 741                 if (err)
 742                         return err;
 743         }
 744
 745         /*
 746          * Note that this interface is up and start everything up ...
 747          */
 748         netif_set_real_num_tx_queues(dev, pi->nqsets);
 749         err = netif_set_real_num_rx_queues(dev, pi->nqsets);
 750         if (err)
 751                 goto err_unwind;
 752         err = link_start(dev);
 753         if (err)
 754                 goto err_unwind;
 755
 756         netif_tx_start_all_queues(dev);
 757         set_bit(pi->port_id, &adapter->open_device_map);
 758         return 0;
 759
 760 err_unwind:
 761         if (adapter->open_device_map == 0)
 762                 adapter_down(adapter);
 763         return err;
 764 }
 765
 766 /*
 767  * Shut down a net device.  This routine is called "cxgb_close" in the PF
 768  * Driver ...
 769  */
 770 static int cxgb4vf_stop(struct net_device *dev)
 771 {
 772         struct port_info *pi = netdev_priv(dev);
 773         struct adapter *adapter = pi->adapter;
 774
 775         netif_tx_stop_all_queues(dev);
 776         netif_carrier_off(dev);
 777         t4vf_enable_vi(adapter, pi->viid, false, false);
 778         pi->link_cfg.link_ok = 0;
 779
 780         clear_bit(pi->port_id, &adapter->open_device_map);
 781         if (adapter->open_device_map == 0)
 782                 adapter_down(adapter);
 783         return 0;
 784 }
 785
 786 /*
 787  * Translate our basic statistics into the standard "ifconfig" statistics.
 788  */
 789 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
 790 {
 791         struct t4vf_port_stats stats;
 792         struct port_info *pi = netdev2pinfo(dev);
 793         struct adapter *adapter = pi->adapter;
 794         struct net_device_stats *ns = &dev->stats;
 795         int err;
 796
 797         spin_lock(&adapter->stats_lock);
 798         err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
 799         spin_unlock(&adapter->stats_lock);
 800
 801         memset(ns, 0, sizeof(*ns));
 802         if (err)
 803                 return ns;
 804
 805         ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
 806                         stats.tx_ucast_bytes + stats.tx_offload_bytes);
 807         ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
 808                           stats.tx_ucast_frames + stats.tx_offload_frames);
 809         ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
 810                         stats.rx_ucast_bytes);
 811         ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
 812                           stats.rx_ucast_frames);
 813         ns->multicast = stats.rx_mcast_frames;
 814         ns->tx_errors = stats.tx_drop_frames;
 815         ns->rx_errors = stats.rx_err_frames;
 816
 817         return ns;
 818 }
 819
 820 /*
 821  * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting
 822  * at a specified offset within the list, into an array of addrss pointers and
 823  * return the number collected.
 824  */
 825 static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev,
 826                                                         const u8 **addr,
 827                                                         unsigned int offset,
 828                                                         unsigned int maxaddrs)
 829 {
 830         unsigned int index = 0;
 831         unsigned int naddr = 0;
 832         const struct netdev_hw_addr *ha;
 833
 834         for_each_dev_addr(dev, ha)
 835                 if (index++ >= offset) {
 836                         addr[naddr++] = ha->addr;
 837                         if (naddr >= maxaddrs)
 838                                 break;
 839                 }
 840         return naddr;
 841 }
 842
 843 /*
 844  * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting
 845  * at a specified offset within the list, into an array of addrss pointers and
 846  * return the number collected.
 847  */
 848 static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev,
 849                                                         const u8 **addr,
 850                                                         unsigned int offset,
 851                                                         unsigned int maxaddrs)
 852 {
 853         unsigned int index = 0;
 854         unsigned int naddr = 0;
 855         const struct netdev_hw_addr *ha;
 856
 857         netdev_for_each_mc_addr(ha, dev)
 858                 if (index++ >= offset) {
 859                         addr[naddr++] = ha->addr;
 860                         if (naddr >= maxaddrs)
 861                                 break;
 862                 }
 863         return naddr;
 864 }
 865
 866 /*
 867  * Configure the exact and hash address filters to handle a port's multicast
 868  * and secondary unicast MAC addresses.
 869  */
 870 static int set_addr_filters(const struct net_device *dev, bool sleep)
 871 {
 872         u64 mhash = 0;
 873         u64 uhash = 0;
 874         bool free = true;
 875         unsigned int offset, naddr;
 876         const u8 *addr[7];
 877         int ret;
 878         const struct port_info *pi = netdev_priv(dev);
 879
 880         /* first do the secondary unicast addresses */
 881         for (offset = 0; ; offset += naddr) {
 882                 naddr = collect_netdev_uc_list_addrs(dev, addr, offset,
 883                                                      ARRAY_SIZE(addr));
 884                 if (naddr == 0)
 885                         break;
 886
 887                 ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
 888                                           naddr, addr, NULL, &uhash, sleep);
 889                 if (ret < 0)
 890                         return ret;
 891
 892                 free = false;
 893         }
 894
 895         /* next set up the multicast addresses */
 896         for (offset = 0; ; offset += naddr) {
 897                 naddr = collect_netdev_mc_list_addrs(dev, addr, offset,
 898                                                      ARRAY_SIZE(addr));
 899                 if (naddr == 0)
 900                         break;
 901
 902                 ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
 903                                           naddr, addr, NULL, &mhash, sleep);
 904                 if (ret < 0)
 905                         return ret;
 906                 free = false;
 907         }
 908
 909         return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,
 910                                   uhash | mhash, sleep);
 911 }
 912
 913 /*
 914  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
 915  * If @mtu is -1 it is left unchanged.
 916  */
 917 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
 918 {
 919         int ret;
 920         struct port_info *pi = netdev_priv(dev);
 921
 922         ret = set_addr_filters(dev, sleep_ok);
 923         if (ret == 0)
 924                 ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1,
 925                                       (dev->flags & IFF_PROMISC) != 0,
 926                                       (dev->flags & IFF_ALLMULTI) != 0,
 927                                       1, -1, sleep_ok);
 928         return ret;
 929 }
 930
 931 /*
 932  * Set the current receive modes on the device.
 933  */
 934 static void cxgb4vf_set_rxmode(struct net_device *dev)
 935 {
 936         /* unfortunately we can't return errors to the stack */
 937         set_rxmode(dev, -1, false);
 938 }
 939
 940 /*
 941  * Find the entry in the interrupt holdoff timer value array which comes
 942  * closest to the specified interrupt holdoff value.
 943  */
 944 static int closest_timer(const struct sge *s, int us)
 945 {
 946         int i, timer_idx = 0, min_delta = INT_MAX;
 947
 948         for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
 949                 int delta = us - s->timer_val[i];
 950                 if (delta < 0)
 951                         delta = -delta;
 952                 if (delta < min_delta) {
 953                         min_delta = delta;
 954                         timer_idx = i;
 955                 }
 956         }
 957         return timer_idx;
 958 }
 959
 960 static int closest_thres(const struct sge *s, int thres)
 961 {
 962         int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
 963
 964         for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
 965                 delta = thres - s->counter_val[i];
 966                 if (delta < 0)
 967                         delta = -delta;
 968                 if (delta < min_delta) {
 969                         min_delta = delta;
 970                         pktcnt_idx = i;
 971                 }
 972         }
 973         return pktcnt_idx;
 974 }
 975
 976 /*
 977  * Return a queue's interrupt hold-off time in us.  0 means no timer.
 978  */
 979 static unsigned int qtimer_val(const struct adapter *adapter,
 980                                const struct sge_rspq *rspq)
 981 {
 982         unsigned int timer_idx = QINTR_TIMER_IDX_GET(rspq->intr_params);
 983
 984         return timer_idx < SGE_NTIMERS
 985                 ? adapter->sge.timer_val[timer_idx]
 986                 : 0;
 987 }
 988
 989 /**
 990  *      set_rxq_intr_params - set a queue's interrupt holdoff parameters
 991  *      @adapter: the adapter
 992  *      @rspq: the RX response queue
 993  *      @us: the hold-off time in us, or 0 to disable timer
 994  *      @cnt: the hold-off packet count, or 0 to disable counter
 995  *
 996  *      Sets an RX response queue's interrupt hold-off time and packet count.
 997  *      At least one of the two needs to be enabled for the queue to generate
 998  *      interrupts.
 999  */
1000 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1001                                unsigned int us, unsigned int cnt)
1002 {
1003         unsigned int timer_idx;
1004
1005         /*
1006          * If both the interrupt holdoff timer and count are specified as
1007          * zero, default to a holdoff count of 1 ...
1008          */
1009         if ((us | cnt) == 0)
1010                 cnt = 1;
1011
1012         /*
1013          * If an interrupt holdoff count has been specified, then find the
1014          * closest configured holdoff count and use that.  If the response
1015          * queue has already been created, then update its queue context
1016          * parameters ...
1017          */
1018         if (cnt) {
1019                 int err;
1020                 u32 v, pktcnt_idx;
1021
1022                 pktcnt_idx = closest_thres(&adapter->sge, cnt);
1023                 if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1024                         v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1025                             FW_PARAMS_PARAM_X(
1026                                         FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1027                             FW_PARAMS_PARAM_YZ(rspq->cntxt_id);
1028                         err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1029                         if (err)
1030                                 return err;
1031                 }
1032                 rspq->pktcnt_idx = pktcnt_idx;
1033         }
1034
1035         /*
1036          * Compute the closest holdoff timer index from the supplied holdoff
1037          * timer value.
1038          */
1039         timer_idx = (us == 0
1040                      ? SGE_TIMER_RSTRT_CNTR
1041                      : closest_timer(&adapter->sge, us));
1042
1043         /*
1044          * Update the response queue's interrupt coalescing parameters and
1045          * return success.
1046          */
1047         rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
1048                              (cnt > 0 ? QINTR_CNT_EN : 0));
1049         return 0;
1050 }
1051
1052 /*
1053  * Return a version number to identify the type of adapter.  The scheme is:
1054  * - bits 0..9: chip version
1055  * - bits 10..15: chip revision
1056  */
1057 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1058 {
1059         /*
1060          * Chip version 4, revision 0x3f (cxgb4vf).
1061          */
1062         return 4 | (0x3f << 10);
1063 }
1064
1065 /*
1066  * Execute the specified ioctl command.
1067  */
1068 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1069 {
1070         int ret = 0;
1071
1072         switch (cmd) {
1073             /*
1074              * The VF Driver doesn't have access to any of the other
1075              * common Ethernet device ioctl()'s (like reading/writing
1076              * PHY registers, etc.
1077              */
1078
1079         default:
1080                 ret = -EOPNOTSUPP;
1081                 break;
1082         }
1083         return ret;
1084 }
1085
1086 /*
1087  * Change the device's MTU.
1088  */
1089 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1090 {
1091         int ret;
1092         struct port_info *pi = netdev_priv(dev);
1093
1094         /* accommodate SACK */
1095         if (new_mtu < 81)
1096                 return -EINVAL;
1097
1098         ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1099                               -1, -1, -1, -1, true);
1100         if (!ret)
1101                 dev->mtu = new_mtu;
1102         return ret;
1103 }
1104
1105 /*
1106  * Change the devices MAC address.
1107  */
1108 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1109 {
1110         int ret;
1111         struct sockaddr *addr = _addr;
1112         struct port_info *pi = netdev_priv(dev);
1113
1114         if (!is_valid_ether_addr(addr->sa_data))
1115                 return -EINVAL;
1116
1117         ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1118                               addr->sa_data, true);
1119         if (ret < 0)
1120                 return ret;
1121
1122         memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1123         pi->xact_addr_filt = ret;
1124         return 0;
1125 }
1126
1127 #ifdef CONFIG_NET_POLL_CONTROLLER
1128 /*
1129  * Poll all of our receive queues.  This is called outside of normal interrupt
1130  * context.
1131  */
1132 static void cxgb4vf_poll_controller(struct net_device *dev)
1133 {
1134         struct port_info *pi = netdev_priv(dev);
1135         struct adapter *adapter = pi->adapter;
1136
1137         if (adapter->flags & USING_MSIX) {
1138                 struct sge_eth_rxq *rxq;
1139                 int nqsets;
1140
1141                 rxq = &adapter->sge.ethrxq[pi->first_qset];
1142                 for (nqsets = pi->nqsets; nqsets; nqsets--) {
1143                         t4vf_sge_intr_msix(0, &rxq->rspq);
1144                         rxq++;
1145                 }
1146         } else
1147                 t4vf_intr_handler(adapter)(0, adapter);
1148 }
1149 #endif
1150
1151 /*
1152  * Ethtool operations.
1153  * ===================
1154  *
1155  * Note that we don't support any ethtool operations which change the physical
1156  * state of the port to which we're linked.
1157  */
1158
1159 /*
1160  * Return current port link settings.
1161  */
1162 static int cxgb4vf_get_settings(struct net_device *dev,
1163                                 struct ethtool_cmd *cmd)
1164 {
1165         const struct port_info *pi = netdev_priv(dev);
1166
1167         cmd->supported = pi->link_cfg.supported;
1168         cmd->advertising = pi->link_cfg.advertising;
1169         ethtool_cmd_speed_set(cmd,
1170                               netif_carrier_ok(dev) ? pi->link_cfg.speed : -1);
1171         cmd->duplex = DUPLEX_FULL;
1172
1173         cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE;
1174         cmd->phy_address = pi->port_id;
1175         cmd->transceiver = XCVR_EXTERNAL;
1176         cmd->autoneg = pi->link_cfg.autoneg;
1177         cmd->maxtxpkt = 0;
1178         cmd->maxrxpkt = 0;
1179         return 0;
1180 }
1181
1182 /*
1183  * Return our driver information.
1184  */
1185 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1186                                 struct ethtool_drvinfo *drvinfo)
1187 {
1188         struct adapter *adapter = netdev2adap(dev);
1189
1190         strcpy(drvinfo->driver, KBUILD_MODNAME);
1191         strcpy(drvinfo->version, DRV_VERSION);
1192         strcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)));
1193         snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1194                  "%u.%u.%u.%u, TP %u.%u.%u.%u",
1195                  FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.fwrev),
1196                  FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.fwrev),
1197                  FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.fwrev),
1198                  FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.fwrev),
1199                  FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.tprev),
1200                  FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.tprev),
1201                  FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.tprev),
1202                  FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.tprev));
1203 }
1204
1205 /*
1206  * Return current adapter message level.
1207  */
1208 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1209 {
1210         return netdev2adap(dev)->msg_enable;
1211 }
1212
1213 /*
1214  * Set current adapter message level.
1215  */
1216 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1217 {
1218         netdev2adap(dev)->msg_enable = msglevel;
1219 }
1220
1221 /*
1222  * Return the device's current Queue Set ring size parameters along with the
1223  * allowed maximum values.  Since ethtool doesn't understand the concept of
1224  * multi-queue devices, we just return the current values associated with the
1225  * first Queue Set.
1226  */
1227 static void cxgb4vf_get_ringparam(struct net_device *dev,
1228                                   struct ethtool_ringparam *rp)
1229 {
1230         const struct port_info *pi = netdev_priv(dev);
1231         const struct sge *s = &pi->adapter->sge;
1232
1233         rp->rx_max_pending = MAX_RX_BUFFERS;
1234         rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1235         rp->rx_jumbo_max_pending = 0;
1236         rp->tx_max_pending = MAX_TXQ_ENTRIES;
1237
1238         rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1239         rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1240         rp->rx_jumbo_pending = 0;
1241         rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1242 }
1243
1244 /*
1245  * Set the Queue Set ring size parameters for the device.  Again, since
1246  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1247  * apply these new values across all of the Queue Sets associated with the
1248  * device -- after vetting them of course!
1249  */
1250 static int cxgb4vf_set_ringparam(struct net_device *dev,
1251                                  struct ethtool_ringparam *rp)
1252 {
1253         const struct port_info *pi = netdev_priv(dev);
1254         struct adapter *adapter = pi->adapter;
1255         struct sge *s = &adapter->sge;
1256         int qs;
1257
1258         if (rp->rx_pending > MAX_RX_BUFFERS ||
1259             rp->rx_jumbo_pending ||
1260             rp->tx_pending > MAX_TXQ_ENTRIES ||
1261             rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1262             rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1263             rp->rx_pending < MIN_FL_ENTRIES ||
1264             rp->tx_pending < MIN_TXQ_ENTRIES)
1265                 return -EINVAL;
1266
1267         if (adapter->flags & FULL_INIT_DONE)
1268                 return -EBUSY;
1269
1270         for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1271                 s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1272                 s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1273                 s->ethtxq[qs].q.size = rp->tx_pending;
1274         }
1275         return 0;
1276 }
1277
1278 /*
1279  * Return the interrupt holdoff timer and count for the first Queue Set on the
1280  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1281  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1282  */
1283 static int cxgb4vf_get_coalesce(struct net_device *dev,
1284                                 struct ethtool_coalesce *coalesce)
1285 {
1286         const struct port_info *pi = netdev_priv(dev);
1287         const struct adapter *adapter = pi->adapter;
1288         const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1289
1290         coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1291         coalesce->rx_max_coalesced_frames =
1292                 ((rspq->intr_params & QINTR_CNT_EN)
1293                  ? adapter->sge.counter_val[rspq->pktcnt_idx]
1294                  : 0);
1295         return 0;
1296 }
1297
1298 /*
1299  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1300  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1301  * the interrupt holdoff timer on any of the device's Queue Sets.
1302  */
1303 static int cxgb4vf_set_coalesce(struct net_device *dev,
1304                                 struct ethtool_coalesce *coalesce)
1305 {
1306         const struct port_info *pi = netdev_priv(dev);
1307         struct adapter *adapter = pi->adapter;
1308
1309         return set_rxq_intr_params(adapter,
1310                                    &adapter->sge.ethrxq[pi->first_qset].rspq,
1311                                    coalesce->rx_coalesce_usecs,
1312                                    coalesce->rx_max_coalesced_frames);
1313 }
1314
1315 /*
1316  * Report current port link pause parameter settings.
1317  */
1318 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1319                                    struct ethtool_pauseparam *pauseparam)
1320 {
1321         struct port_info *pi = netdev_priv(dev);
1322
1323         pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1324         pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1325         pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1326 }
1327
1328 /*
1329  * Identify the port by blinking the port's LED.
1330  */
1331 static int cxgb4vf_phys_id(struct net_device *dev,
1332                            enum ethtool_phys_id_state state)
1333 {
1334         unsigned int val;
1335         struct port_info *pi = netdev_priv(dev);
1336
1337         if (state == ETHTOOL_ID_ACTIVE)
1338                 val = 0xffff;
1339         else if (state == ETHTOOL_ID_INACTIVE)
1340                 val = 0;
1341         else
1342                 return -EINVAL;
1343
1344         return t4vf_identify_port(pi->adapter, pi->viid, val);
1345 }
1346
1347 /*
1348  * Port stats maintained per queue of the port.
1349  */
1350 struct queue_port_stats {
1351         u64 tso;
1352         u64 tx_csum;
1353         u64 rx_csum;
1354         u64 vlan_ex;
1355         u64 vlan_ins;
1356         u64 lro_pkts;
1357         u64 lro_merged;
1358 };
1359
1360 /*
1361  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1362  * these need to match the order of statistics returned by
1363  * t4vf_get_port_stats().
1364  */
1365 static const char stats_strings[][ETH_GSTRING_LEN] = {
1366         /*
1367          * These must match the layout of the t4vf_port_stats structure.
1368          */
1369         "TxBroadcastBytes  ",
1370         "TxBroadcastFrames ",
1371         "TxMulticastBytes  ",
1372         "TxMulticastFrames ",
1373         "TxUnicastBytes    ",
1374         "TxUnicastFrames   ",
1375         "TxDroppedFrames   ",
1376         "TxOffloadBytes    ",
1377         "TxOffloadFrames   ",
1378         "RxBroadcastBytes  ",
1379         "RxBroadcastFrames ",
1380         "RxMulticastBytes  ",
1381         "RxMulticastFrames ",
1382         "RxUnicastBytes    ",
1383         "RxUnicastFrames   ",
1384         "RxErrorFrames     ",
1385
1386         /*
1387          * These are accumulated per-queue statistics and must match the
1388          * order of the fields in the queue_port_stats structure.
1389          */
1390         "TSO               ",
1391         "TxCsumOffload     ",
1392         "RxCsumGood        ",
1393         "VLANextractions   ",
1394         "VLANinsertions    ",
1395         "GROPackets        ",
1396         "GROMerged         ",
1397 };
1398
1399 /*
1400  * Return the number of statistics in the specified statistics set.
1401  */
1402 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1403 {
1404         switch (sset) {
1405         case ETH_SS_STATS:
1406                 return ARRAY_SIZE(stats_strings);
1407         default:
1408                 return -EOPNOTSUPP;
1409         }
1410         /*NOTREACHED*/
1411 }
1412
1413 /*
1414  * Return the strings for the specified statistics set.
1415  */
1416 static void cxgb4vf_get_strings(struct net_device *dev,
1417                                 u32 sset,
1418                                 u8 *data)
1419 {
1420         switch (sset) {
1421         case ETH_SS_STATS:
1422                 memcpy(data, stats_strings, sizeof(stats_strings));
1423                 break;
1424         }
1425 }
1426
1427 /*
1428  * Small utility routine to accumulate queue statistics across the queues of
1429  * a "port".
1430  */
1431 static void collect_sge_port_stats(const struct adapter *adapter,
1432                                    const struct port_info *pi,
1433                                    struct queue_port_stats *stats)
1434 {
1435         const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1436         const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1437         int qs;
1438
1439         memset(stats, 0, sizeof(*stats));
1440         for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1441                 stats->tso += txq->tso;
1442                 stats->tx_csum += txq->tx_cso;
1443                 stats->rx_csum += rxq->stats.rx_cso;
1444                 stats->vlan_ex += rxq->stats.vlan_ex;
1445                 stats->vlan_ins += txq->vlan_ins;
1446                 stats->lro_pkts += rxq->stats.lro_pkts;
1447                 stats->lro_merged += rxq->stats.lro_merged;
1448         }
1449 }
1450
1451 /*
1452  * Return the ETH_SS_STATS statistics set.
1453  */
1454 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1455                                       struct ethtool_stats *stats,
1456                                       u64 *data)
1457 {
1458         struct port_info *pi = netdev2pinfo(dev);
1459         struct adapter *adapter = pi->adapter;
1460         int err = t4vf_get_port_stats(adapter, pi->pidx,
1461                                       (struct t4vf_port_stats *)data);
1462         if (err)
1463                 memset(data, 0, sizeof(struct t4vf_port_stats));
1464
1465         data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1466         collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1467 }
1468
1469 /*
1470  * Return the size of our register map.
1471  */
1472 static int cxgb4vf_get_regs_len(struct net_device *dev)
1473 {
1474         return T4VF_REGMAP_SIZE;
1475 }
1476
1477 /*
1478  * Dump a block of registers, start to end inclusive, into a buffer.
1479  */
1480 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1481                            unsigned int start, unsigned int end)
1482 {
1483         u32 *bp = regbuf + start - T4VF_REGMAP_START;
1484
1485         for ( ; start <= end; start += sizeof(u32)) {
1486                 /*
1487                  * Avoid reading the Mailbox Control register since that
1488                  * can trigger a Mailbox Ownership Arbitration cycle and
1489                  * interfere with communication with the firmware.
1490                  */
1491                 if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1492                         *bp++ = 0xffff;
1493                 else
1494                         *bp++ = t4_read_reg(adapter, start);
1495         }
1496 }
1497
1498 /*
1499  * Copy our entire register map into the provided buffer.
1500  */
1501 static void cxgb4vf_get_regs(struct net_device *dev,
1502                              struct ethtool_regs *regs,
1503                              void *regbuf)
1504 {
1505         struct adapter *adapter = netdev2adap(dev);
1506
1507         regs->version = mk_adap_vers(adapter);
1508
1509         /*
1510          * Fill in register buffer with our register map.
1511          */
1512         memset(regbuf, 0, T4VF_REGMAP_SIZE);
1513
1514         reg_block_dump(adapter, regbuf,
1515                        T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1516                        T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1517         reg_block_dump(adapter, regbuf,
1518                        T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1519                        T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1520         reg_block_dump(adapter, regbuf,
1521                        T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1522                        T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_LAST);
1523         reg_block_dump(adapter, regbuf,
1524                        T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1525                        T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1526
1527         reg_block_dump(adapter, regbuf,
1528                        T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1529                        T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1530 }
1531
1532 /*
1533  * Report current Wake On LAN settings.
1534  */
1535 static void cxgb4vf_get_wol(struct net_device *dev,
1536                             struct ethtool_wolinfo *wol)
1537 {
1538         wol->supported = 0;
1539         wol->wolopts = 0;
1540         memset(&wol->sopass, 0, sizeof(wol->sopass));
1541 }
1542
1543 /*
1544  * TCP Segmentation Offload flags which we support.
1545  */
1546 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1547
1548 static struct ethtool_ops cxgb4vf_ethtool_ops = {
1549         .get_settings           = cxgb4vf_get_settings,
1550         .get_drvinfo            = cxgb4vf_get_drvinfo,
1551         .get_msglevel           = cxgb4vf_get_msglevel,
1552         .set_msglevel           = cxgb4vf_set_msglevel,
1553         .get_ringparam          = cxgb4vf_get_ringparam,
1554         .set_ringparam          = cxgb4vf_set_ringparam,
1555         .get_coalesce           = cxgb4vf_get_coalesce,
1556         .set_coalesce           = cxgb4vf_set_coalesce,
1557         .get_pauseparam         = cxgb4vf_get_pauseparam,
1558         .get_link               = ethtool_op_get_link,
1559         .get_strings            = cxgb4vf_get_strings,
1560         .set_phys_id            = cxgb4vf_phys_id,
1561         .get_sset_count         = cxgb4vf_get_sset_count,
1562         .get_ethtool_stats      = cxgb4vf_get_ethtool_stats,
1563         .get_regs_len           = cxgb4vf_get_regs_len,
1564         .get_regs               = cxgb4vf_get_regs,
1565         .get_wol                = cxgb4vf_get_wol,
1566 };
1567
1568 /*
1569  * /sys/kernel/debug/cxgb4vf support code and data.
1570  * ================================================
1571  */
1572
1573 /*
1574  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1575  */
1576 #define QPL     4
1577
1578 static int sge_qinfo_show(struct seq_file *seq, void *v)
1579 {
1580         struct adapter *adapter = seq->private;
1581         int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1582         int qs, r = (uintptr_t)v - 1;
1583
1584         if (r)
1585                 seq_putc(seq, '\n');
1586
1587         #define S3(fmt_spec, s, v) \
1588                 do {\
1589                         seq_printf(seq, "%-12s", s); \
1590                         for (qs = 0; qs < n; ++qs) \
1591                                 seq_printf(seq, " %16" fmt_spec, v); \
1592                         seq_putc(seq, '\n'); \
1593                 } while (0)
1594         #define S(s, v)         S3("s", s, v)
1595         #define T(s, v)         S3("u", s, txq[qs].v)
1596         #define R(s, v)         S3("u", s, rxq[qs].v)
1597
1598         if (r < eth_entries) {
1599                 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1600                 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1601                 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1602
1603                 S("QType:", "Ethernet");
1604                 S("Interface:",
1605                   (rxq[qs].rspq.netdev
1606                    ? rxq[qs].rspq.netdev->name
1607                    : "N/A"));
1608                 S3("d", "Port:",
1609                    (rxq[qs].rspq.netdev
1610                     ? ((struct port_info *)
1611                        netdev_priv(rxq[qs].rspq.netdev))->port_id
1612                     : -1));
1613                 T("TxQ ID:", q.abs_id);
1614                 T("TxQ size:", q.size);
1615                 T("TxQ inuse:", q.in_use);
1616                 T("TxQ PIdx:", q.pidx);
1617                 T("TxQ CIdx:", q.cidx);
1618                 R("RspQ ID:", rspq.abs_id);
1619                 R("RspQ size:", rspq.size);
1620                 R("RspQE size:", rspq.iqe_len);
1621                 S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1622                 S3("u", "Intr pktcnt:",
1623                    adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1624                 R("RspQ CIdx:", rspq.cidx);
1625                 R("RspQ Gen:", rspq.gen);
1626                 R("FL ID:", fl.abs_id);
1627                 R("FL size:", fl.size - MIN_FL_RESID);
1628                 R("FL avail:", fl.avail);
1629                 R("FL PIdx:", fl.pidx);
1630                 R("FL CIdx:", fl.cidx);
1631                 return 0;
1632         }
1633
1634         r -= eth_entries;
1635         if (r == 0) {
1636                 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1637
1638                 seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1639                 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1640                 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1641                            qtimer_val(adapter, evtq));
1642                 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1643                            adapter->sge.counter_val[evtq->pktcnt_idx]);
1644                 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1645                 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1646         } else if (r == 1) {
1647                 const struct sge_rspq *intrq = &adapter->sge.intrq;
1648
1649                 seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1650                 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1651                 seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1652                            qtimer_val(adapter, intrq));
1653                 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1654                            adapter->sge.counter_val[intrq->pktcnt_idx]);
1655                 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1656                 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1657         }
1658
1659         #undef R
1660         #undef T
1661         #undef S
1662         #undef S3
1663
1664         return 0;
1665 }
1666
1667 /*
1668  * Return the number of "entries" in our "file".  We group the multi-Queue
1669  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1670  *
1671  *     Ethernet RX/TX Queue Sets
1672  *     Firmware Event Queue
1673  *     Forwarded Interrupt Queue (if in MSI mode)
1674  */
1675 static int sge_queue_entries(const struct adapter *adapter)
1676 {
1677         return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1678                 ((adapter->flags & USING_MSI) != 0);
1679 }
1680
1681 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1682 {
1683         int entries = sge_queue_entries(seq->private);
1684
1685         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1686 }
1687
1688 static void sge_queue_stop(struct seq_file *seq, void *v)
1689 {
1690 }
1691
1692 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1693 {
1694         int entries = sge_queue_entries(seq->private);
1695
1696         ++*pos;
1697         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1698 }
1699
1700 static const struct seq_operations sge_qinfo_seq_ops = {
1701         .start = sge_queue_start,
1702         .next  = sge_queue_next,
1703         .stop  = sge_queue_stop,
1704         .show  = sge_qinfo_show
1705 };
1706
1707 static int sge_qinfo_open(struct inode *inode, struct file *file)
1708 {
1709         int res = seq_open(file, &sge_qinfo_seq_ops);
1710
1711         if (!res) {
1712                 struct seq_file *seq = file->private_data;
1713                 seq->private = inode->i_private;
1714         }
1715         return res;
1716 }
1717
1718 static const struct file_operations sge_qinfo_debugfs_fops = {
1719         .owner   = THIS_MODULE,
1720         .open    = sge_qinfo_open,
1721         .read    = seq_read,
1722         .llseek  = seq_lseek,
1723         .release = seq_release,
1724 };
1725
1726 /*
1727  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
1728  */
1729 #define QPL     4
1730
1731 static int sge_qstats_show(struct seq_file *seq, void *v)
1732 {
1733         struct adapter *adapter = seq->private;
1734         int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1735         int qs, r = (uintptr_t)v - 1;
1736
1737         if (r)
1738                 seq_putc(seq, '\n');
1739
1740         #define S3(fmt, s, v) \
1741                 do { \
1742                         seq_printf(seq, "%-16s", s); \
1743                         for (qs = 0; qs < n; ++qs) \
1744                                 seq_printf(seq, " %8" fmt, v); \
1745                         seq_putc(seq, '\n'); \
1746                 } while (0)
1747         #define S(s, v)         S3("s", s, v)
1748
1749         #define T3(fmt, s, v)   S3(fmt, s, txq[qs].v)
1750         #define T(s, v)         T3("lu", s, v)
1751
1752         #define R3(fmt, s, v)   S3(fmt, s, rxq[qs].v)
1753         #define R(s, v)         R3("lu", s, v)
1754
1755         if (r < eth_entries) {
1756                 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1757                 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1758                 int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1759
1760                 S("QType:", "Ethernet");
1761                 S("Interface:",
1762                   (rxq[qs].rspq.netdev
1763                    ? rxq[qs].rspq.netdev->name
1764                    : "N/A"));
1765                 R3("u", "RspQNullInts:", rspq.unhandled_irqs);
1766                 R("RxPackets:", stats.pkts);
1767                 R("RxCSO:", stats.rx_cso);
1768                 R("VLANxtract:", stats.vlan_ex);
1769                 R("LROmerged:", stats.lro_merged);
1770                 R("LROpackets:", stats.lro_pkts);
1771                 R("RxDrops:", stats.rx_drops);
1772                 T("TSO:", tso);
1773                 T("TxCSO:", tx_cso);
1774                 T("VLANins:", vlan_ins);
1775                 T("TxQFull:", q.stops);
1776                 T("TxQRestarts:", q.restarts);
1777                 T("TxMapErr:", mapping_err);
1778                 R("FLAllocErr:", fl.alloc_failed);
1779                 R("FLLrgAlcErr:", fl.large_alloc_failed);
1780                 R("FLStarving:", fl.starving);
1781                 return 0;
1782         }
1783
1784         r -= eth_entries;
1785         if (r == 0) {
1786                 const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1787
1788                 seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
1789                 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1790                            evtq->unhandled_irqs);
1791                 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
1792                 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
1793         } else if (r == 1) {
1794                 const struct sge_rspq *intrq = &adapter->sge.intrq;
1795
1796                 seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
1797                 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1798                            intrq->unhandled_irqs);
1799                 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
1800                 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
1801         }
1802
1803         #undef R
1804         #undef T
1805         #undef S
1806         #undef R3
1807         #undef T3
1808         #undef S3
1809
1810         return 0;
1811 }
1812
1813 /*
1814  * Return the number of "entries" in our "file".  We group the multi-Queue
1815  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1816  *
1817  *     Ethernet RX/TX Queue Sets
1818  *     Firmware Event Queue
1819  *     Forwarded Interrupt Queue (if in MSI mode)
1820  */
1821 static int sge_qstats_entries(const struct adapter *adapter)
1822 {
1823         return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1824                 ((adapter->flags & USING_MSI) != 0);
1825 }
1826
1827 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
1828 {
1829         int entries = sge_qstats_entries(seq->private);
1830
1831         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1832 }
1833
1834 static void sge_qstats_stop(struct seq_file *seq, void *v)
1835 {
1836 }
1837
1838 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
1839 {
1840         int entries = sge_qstats_entries(seq->private);
1841
1842         (*pos)++;
1843         return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1844 }
1845
1846 static const struct seq_operations sge_qstats_seq_ops = {
1847         .start = sge_qstats_start,
1848         .next  = sge_qstats_next,
1849         .stop  = sge_qstats_stop,
1850         .show  = sge_qstats_show
1851 };
1852
1853 static int sge_qstats_open(struct inode *inode, struct file *file)
1854 {
1855         int res = seq_open(file, &sge_qstats_seq_ops);
1856
1857         if (res == 0) {
1858                 struct seq_file *seq = file->private_data;
1859                 seq->private = inode->i_private;
1860         }
1861         return res;
1862 }
1863
1864 static const struct file_operations sge_qstats_proc_fops = {
1865         .owner   = THIS_MODULE,
1866         .open    = sge_qstats_open,
1867         .read    = seq_read,
1868         .llseek  = seq_lseek,
1869         .release = seq_release,
1870 };
1871
1872 /*
1873  * Show PCI-E SR-IOV Virtual Function Resource Limits.
1874  */
1875 static int resources_show(struct seq_file *seq, void *v)
1876 {
1877         struct adapter *adapter = seq->private;
1878         struct vf_resources *vfres = &adapter->params.vfres;
1879
1880         #define S(desc, fmt, var) \
1881                 seq_printf(seq, "%-60s " fmt "\n", \
1882                            desc " (" #var "):", vfres->var)
1883
1884         S("Virtual Interfaces", "%d", nvi);
1885         S("Egress Queues", "%d", neq);
1886         S("Ethernet Control", "%d", nethctrl);
1887         S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
1888         S("Ingress Queues", "%d", niq);
1889         S("Traffic Class", "%d", tc);
1890         S("Port Access Rights Mask", "%#x", pmask);
1891         S("MAC Address Filters", "%d", nexactf);
1892         S("Firmware Command Read Capabilities", "%#x", r_caps);
1893         S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
1894
1895         #undef S
1896
1897         return 0;
1898 }
1899
1900 static int resources_open(struct inode *inode, struct file *file)
1901 {
1902         return single_open(file, resources_show, inode->i_private);
1903 }
1904
1905 static const struct file_operations resources_proc_fops = {
1906         .owner   = THIS_MODULE,
1907         .open    = resources_open,
1908         .read    = seq_read,
1909         .llseek  = seq_lseek,
1910         .release = single_release,
1911 };
1912
1913 /*
1914  * Show Virtual Interfaces.
1915  */
1916 static int interfaces_show(struct seq_file *seq, void *v)
1917 {
1918         if (v == SEQ_START_TOKEN) {
1919                 seq_puts(seq, "Interface  Port   VIID\n");
1920         } else {
1921                 struct adapter *adapter = seq->private;
1922                 int pidx = (uintptr_t)v - 2;
1923                 struct net_device *dev = adapter->port[pidx];
1924                 struct port_info *pi = netdev_priv(dev);
1925
1926                 seq_printf(seq, "%9s  %4d  %#5x\n",
1927                            dev->name, pi->port_id, pi->viid);
1928         }
1929         return 0;
1930 }
1931
1932 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
1933 {
1934         return pos <= adapter->params.nports
1935                 ? (void *)(uintptr_t)(pos + 1)
1936                 : NULL;
1937 }
1938
1939 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
1940 {
1941         return *pos
1942                 ? interfaces_get_idx(seq->private, *pos)
1943                 : SEQ_START_TOKEN;
1944 }
1945
1946 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
1947 {
1948         (*pos)++;
1949         return interfaces_get_idx(seq->private, *pos);
1950 }
1951
1952 static void interfaces_stop(struct seq_file *seq, void *v)
1953 {
1954 }
1955
1956 static const struct seq_operations interfaces_seq_ops = {
1957         .start = interfaces_start,
1958         .next  = interfaces_next,
1959         .stop  = interfaces_stop,
1960         .show  = interfaces_show
1961 };
1962
1963 static int interfaces_open(struct inode *inode, struct file *file)
1964 {
1965         int res = seq_open(file, &interfaces_seq_ops);
1966
1967         if (res == 0) {
1968                 struct seq_file *seq = file->private_data;
1969                 seq->private = inode->i_private;
1970         }
1971         return res;
1972 }
1973
1974 static const struct file_operations interfaces_proc_fops = {
1975         .owner   = THIS_MODULE,
1976         .open    = interfaces_open,
1977         .read    = seq_read,
1978         .llseek  = seq_lseek,
1979         .release = seq_release,
1980 };
1981
1982 /*
1983  * /sys/kernel/debugfs/cxgb4vf/ files list.
1984  */
1985 struct cxgb4vf_debugfs_entry {
1986         const char *name;               /* name of debugfs node */
1987         mode_t mode;                    /* file system mode */
1988         const struct file_operations *fops;
1989 };
1990
1991 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
1992         { "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
1993         { "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
1994         { "resources",  S_IRUGO, &resources_proc_fops },
1995         { "interfaces", S_IRUGO, &interfaces_proc_fops },
1996 };
1997
1998 /*
1999  * Module and device initialization and cleanup code.
2000  * ==================================================
2001  */
2002
2003 /*
2004  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2005  * directory (debugfs_root) has already been set up.
2006  */
2007 static int __devinit setup_debugfs(struct adapter *adapter)
2008 {
2009         int i;
2010
2011         BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2012
2013         /*
2014          * Debugfs support is best effort.
2015          */
2016         for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2017                 (void)debugfs_create_file(debugfs_files[i].name,
2018                                   debugfs_files[i].mode,
2019                                   adapter->debugfs_root,
2020                                   (void *)adapter,
2021                                   debugfs_files[i].fops);
2022
2023         return 0;
2024 }
2025
2026 /*
2027  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2028  * it to our caller to tear down the directory (debugfs_root).
2029  */
2030 static void cleanup_debugfs(struct adapter *adapter)
2031 {
2032         BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2033
2034         /*
2035          * Unlike our sister routine cleanup_proc(), we don't need to remove
2036          * individual entries because a call will be made to
2037          * debugfs_remove_recursive().  We just need to clean up any ancillary
2038          * persistent state.
2039          */
2040         /* nothing to do */
2041 }
2042
2043 /*
2044  * Perform early "adapter" initialization.  This is where we discover what
2045  * adapter parameters we're going to be using and initialize basic adapter
2046  * hardware support.
2047  */
2048 static int __devinit adap_init0(struct adapter *adapter)
2049 {
2050         struct vf_resources *vfres = &adapter->params.vfres;
2051         struct sge_params *sge_params = &adapter->params.sge;
2052         struct sge *s = &adapter->sge;
2053         unsigned int ethqsets;
2054         int err;
2055
2056         /*
2057          * Wait for the device to become ready before proceeding ...
2058          */
2059         err = t4vf_wait_dev_ready(adapter);
2060         if (err) {
2061                 dev_err(adapter->pdev_dev, "device didn't become ready:"
2062                         " err=%d\n", err);
2063                 return err;
2064         }
2065
2066         /*
2067          * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2068          * 2.6.31 and later we can't call pci_reset_function() in order to
2069          * issue an FLR because of a self- deadlock on the device semaphore.
2070          * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2071          * cases where they're needed -- for instance, some versions of KVM
2072          * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2073          * use the firmware based reset in order to reset any per function
2074          * state.
2075          */
2076         err = t4vf_fw_reset(adapter);
2077         if (err < 0) {
2078                 dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2079                 return err;
2080         }
2081
2082         /*
2083          * Grab basic operational parameters.  These will predominantly have
2084          * been set up by the Physical Function Driver or will be hard coded
2085          * into the adapter.  We just have to live with them ...  Note that
2086          * we _must_ get our VPD parameters before our SGE parameters because
2087          * we need to know the adapter's core clock from the VPD in order to
2088          * properly decode the SGE Timer Values.
2089          */
2090         err = t4vf_get_dev_params(adapter);
2091         if (err) {
2092                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2093                         " device parameters: err=%d\n", err);
2094                 return err;
2095         }
2096         err = t4vf_get_vpd_params(adapter);
2097         if (err) {
2098                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2099                         " VPD parameters: err=%d\n", err);
2100                 return err;
2101         }
2102         err = t4vf_get_sge_params(adapter);
2103         if (err) {
2104                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2105                         " SGE parameters: err=%d\n", err);
2106                 return err;
2107         }
2108         err = t4vf_get_rss_glb_config(adapter);
2109         if (err) {
2110                 dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2111                         " RSS parameters: err=%d\n", err);
2112                 return err;
2113         }
2114         if (adapter->params.rss.mode !=
2115             FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2116                 dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2117                         " mode %d\n", adapter->params.rss.mode);
2118                 return -EINVAL;
2119         }
2120         err = t4vf_sge_init(adapter);
2121         if (err) {
2122                 dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2123                         " err=%d\n", err);
2124                 return err;
2125         }
2126
2127         /*
2128          * Retrieve our RX interrupt holdoff timer values and counter
2129          * threshold values from the SGE parameters.
2130          */
2131         s->timer_val[0] = core_ticks_to_us(adapter,
2132                 TIMERVALUE0_GET(sge_params->sge_timer_value_0_and_1));
2133         s->timer_val[1] = core_ticks_to_us(adapter,
2134                 TIMERVALUE1_GET(sge_params->sge_timer_value_0_and_1));
2135         s->timer_val[2] = core_ticks_to_us(adapter,
2136                 TIMERVALUE0_GET(sge_params->sge_timer_value_2_and_3));
2137         s->timer_val[3] = core_ticks_to_us(adapter,
2138                 TIMERVALUE1_GET(sge_params->sge_timer_value_2_and_3));
2139         s->timer_val[4] = core_ticks_to_us(adapter,
2140                 TIMERVALUE0_GET(sge_params->sge_timer_value_4_and_5));
2141         s->timer_val[5] = core_ticks_to_us(adapter,
2142                 TIMERVALUE1_GET(sge_params->sge_timer_value_4_and_5));
2143
2144         s->counter_val[0] =
2145                 THRESHOLD_0_GET(sge_params->sge_ingress_rx_threshold);
2146         s->counter_val[1] =
2147                 THRESHOLD_1_GET(sge_params->sge_ingress_rx_threshold);
2148         s->counter_val[2] =
2149                 THRESHOLD_2_GET(sge_params->sge_ingress_rx_threshold);
2150         s->counter_val[3] =
2151                 THRESHOLD_3_GET(sge_params->sge_ingress_rx_threshold);
2152
2153         /*
2154          * Grab our Virtual Interface resource allocation, extract the
2155          * features that we're interested in and do a bit of sanity testing on
2156          * what we discover.
2157          */
2158         err = t4vf_get_vfres(adapter);
2159         if (err) {
2160                 dev_err(adapter->pdev_dev, "unable to get virtual interface"
2161                         " resources: err=%d\n", err);
2162                 return err;
2163         }
2164
2165         /*
2166          * The number of "ports" which we support is equal to the number of
2167          * Virtual Interfaces with which we've been provisioned.
2168          */
2169         adapter->params.nports = vfres->nvi;
2170         if (adapter->params.nports > MAX_NPORTS) {
2171                 dev_warn(adapter->pdev_dev, "only using %d of %d allowed"
2172                          " virtual interfaces\n", MAX_NPORTS,
2173                          adapter->params.nports);
2174                 adapter->params.nports = MAX_NPORTS;
2175         }
2176
2177         /*
2178          * We need to reserve a number of the ingress queues with Free List
2179          * and Interrupt capabilities for special interrupt purposes (like
2180          * asynchronous firmware messages, or forwarded interrupts if we're
2181          * using MSI).  The rest of the FL/Intr-capable ingress queues will be
2182          * matched up one-for-one with Ethernet/Control egress queues in order
2183          * to form "Queue Sets" which will be aportioned between the "ports".
2184          * For each Queue Set, we'll need the ability to allocate two Egress
2185          * Contexts -- one for the Ingress Queue Free List and one for the TX
2186          * Ethernet Queue.
2187          */
2188         ethqsets = vfres->niqflint - INGQ_EXTRAS;
2189         if (vfres->nethctrl != ethqsets) {
2190                 dev_warn(adapter->pdev_dev, "unequal number of [available]"
2191                          " ingress/egress queues (%d/%d); using minimum for"
2192                          " number of Queue Sets\n", ethqsets, vfres->nethctrl);
2193                 ethqsets = min(vfres->nethctrl, ethqsets);
2194         }
2195         if (vfres->neq < ethqsets*2) {
2196                 dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)"
2197                          " to support Queue Sets (%d); reducing allowed Queue"
2198                          " Sets\n", vfres->neq, ethqsets);
2199                 ethqsets = vfres->neq/2;
2200         }
2201         if (ethqsets > MAX_ETH_QSETS) {
2202                 dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue"
2203                          " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets);
2204                 ethqsets = MAX_ETH_QSETS;
2205         }
2206         if (vfres->niq != 0 || vfres->neq > ethqsets*2) {
2207                 dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)"
2208                          " ignored\n", vfres->niq, vfres->neq - ethqsets*2);
2209         }
2210         adapter->sge.max_ethqsets = ethqsets;
2211
2212         /*
2213          * Check for various parameter sanity issues.  Most checks simply
2214          * result in us using fewer resources than our provissioning but we
2215          * do need at least  one "port" with which to work ...
2216          */
2217         if (adapter->sge.max_ethqsets < adapter->params.nports) {
2218                 dev_warn(adapter->pdev_dev, "only using %d of %d available"
2219                          " virtual interfaces (too few Queue Sets)\n",
2220                          adapter->sge.max_ethqsets, adapter->params.nports);
2221                 adapter->params.nports = adapter->sge.max_ethqsets;
2222         }
2223         if (adapter->params.nports == 0) {
2224                 dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2225                         "usable!\n");
2226                 return -EINVAL;
2227         }
2228         return 0;
2229 }
2230
2231 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2232                              u8 pkt_cnt_idx, unsigned int size,
2233                              unsigned int iqe_size)
2234 {
2235         rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
2236                              (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0));
2237         rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2238                             ? pkt_cnt_idx
2239                             : 0);
2240         rspq->iqe_len = iqe_size;
2241         rspq->size = size;
2242 }
2243
2244 /*
2245  * Perform default configuration of DMA queues depending on the number and
2246  * type of ports we found and the number of available CPUs.  Most settings can
2247  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2248  * being brought up for the first time.
2249  */
2250 static void __devinit cfg_queues(struct adapter *adapter)
2251 {
2252         struct sge *s = &adapter->sge;
2253         int q10g, n10g, qidx, pidx, qs;
2254         size_t iqe_size;
2255
2256         /*
2257          * We should not be called till we know how many Queue Sets we can
2258          * support.  In particular, this means that we need to know what kind
2259          * of interrupts we'll be using ...
2260          */
2261         BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2262
2263         /*
2264          * Count the number of 10GbE Virtual Interfaces that we have.
2265          */
2266         n10g = 0;
2267         for_each_port(adapter, pidx)
2268                 n10g += is_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2269
2270         /*
2271          * We default to 1 queue per non-10G port and up to # of cores queues
2272          * per 10G port.
2273          */
2274         if (n10g == 0)
2275                 q10g = 0;
2276         else {
2277                 int n1g = (adapter->params.nports - n10g);
2278                 q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2279                 if (q10g > num_online_cpus())
2280                         q10g = num_online_cpus();
2281         }
2282
2283         /*
2284          * Allocate the "Queue Sets" to the various Virtual Interfaces.
2285          * The layout will be established in setup_sge_queues() when the
2286          * adapter is brough up for the first time.
2287          */
2288         qidx = 0;
2289         for_each_port(adapter, pidx) {
2290                 struct port_info *pi = adap2pinfo(adapter, pidx);
2291
2292                 pi->first_qset = qidx;
2293                 pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
2294                 qidx += pi->nqsets;
2295         }
2296         s->ethqsets = qidx;
2297
2298         /*
2299          * The Ingress Queue Entry Size for our various Response Queues needs
2300          * to be big enough to accommodate the largest message we can receive
2301          * from the chip/firmware; which is 64 bytes ...
2302          */
2303         iqe_size = 64;
2304
2305         /*
2306          * Set up default Queue Set parameters ...  Start off with the
2307          * shortest interrupt holdoff timer.
2308          */
2309         for (qs = 0; qs < s->max_ethqsets; qs++) {
2310                 struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2311                 struct sge_eth_txq *txq = &s->ethtxq[qs];
2312
2313                 init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2314                 rxq->fl.size = 72;
2315                 txq->q.size = 1024;
2316         }
2317
2318         /*
2319          * The firmware event queue is used for link state changes and
2320          * notifications of TX DMA completions.
2321          */
2322         init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2323
2324         /*
2325          * The forwarded interrupt queue is used when we're in MSI interrupt
2326          * mode.  In this mode all interrupts associated with RX queues will
2327          * be forwarded to a single queue which we'll associate with our MSI
2328          * interrupt vector.  The messages dropped in the forwarded interrupt
2329          * queue will indicate which ingress queue needs servicing ...  This
2330          * queue needs to be large enough to accommodate all of the ingress
2331          * queues which are forwarding their interrupt (+1 to prevent the PIDX
2332          * from equalling the CIDX if every ingress queue has an outstanding
2333          * interrupt).  The queue doesn't need to be any larger because no
2334          * ingress queue will ever have more than one outstanding interrupt at
2335          * any time ...
2336          */
2337         init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2338                   iqe_size);
2339 }
2340
2341 /*
2342  * Reduce the number of Ethernet queues across all ports to at most n.
2343  * n provides at least one queue per port.
2344  */
2345 static void __devinit reduce_ethqs(struct adapter *adapter, int n)
2346 {
2347         int i;
2348         struct port_info *pi;
2349
2350         /*
2351          * While we have too many active Ether Queue Sets, interate across the
2352          * "ports" and reduce their individual Queue Set allocations.
2353          */
2354         BUG_ON(n < adapter->params.nports);
2355         while (n < adapter->sge.ethqsets)
2356                 for_each_port(adapter, i) {
2357                         pi = adap2pinfo(adapter, i);
2358                         if (pi->nqsets > 1) {
2359                                 pi->nqsets--;
2360                                 adapter->sge.ethqsets--;
2361                                 if (adapter->sge.ethqsets <= n)
2362                                         break;
2363                         }
2364                 }
2365
2366         /*
2367          * Reassign the starting Queue Sets for each of the "ports" ...
2368          */
2369         n = 0;
2370         for_each_port(adapter, i) {
2371                 pi = adap2pinfo(adapter, i);
2372                 pi->first_qset = n;
2373                 n += pi->nqsets;
2374         }
2375 }
2376
2377 /*
2378  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2379  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2380  * need.  Minimally we need one for every Virtual Interface plus those needed
2381  * for our "extras".  Note that this process may lower the maximum number of
2382  * allowed Queue Sets ...
2383  */
2384 static int __devinit enable_msix(struct adapter *adapter)
2385 {
2386         int i, err, want, need;
2387         struct msix_entry entries[MSIX_ENTRIES];
2388         struct sge *s = &adapter->sge;
2389
2390         for (i = 0; i < MSIX_ENTRIES; ++i)
2391                 entries[i].entry = i;
2392
2393         /*
2394          * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2395          * plus those needed for our "extras" (for example, the firmware
2396          * message queue).  We _need_ at least one "Queue Set" per Virtual
2397          * Interface plus those needed for our "extras".  So now we get to see
2398          * if the song is right ...
2399          */
2400         want = s->max_ethqsets + MSIX_EXTRAS;
2401         need = adapter->params.nports + MSIX_EXTRAS;
2402         while ((err = pci_enable_msix(adapter->pdev, entries, want)) >= need)
2403                 want = err;
2404
2405         if (err == 0) {
2406                 int nqsets = want - MSIX_EXTRAS;
2407                 if (nqsets < s->max_ethqsets) {
2408                         dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2409                                  " for %d Queue Sets\n", nqsets);
2410                         s->max_ethqsets = nqsets;
2411                         if (nqsets < s->ethqsets)
2412                                 reduce_ethqs(adapter, nqsets);
2413                 }
2414                 for (i = 0; i < want; ++i)
2415                         adapter->msix_info[i].vec = entries[i].vector;
2416         } else if (err > 0) {
2417                 pci_disable_msix(adapter->pdev);
2418                 dev_info(adapter->pdev_dev, "only %d MSI-X vectors left,"
2419                          " not using MSI-X\n", err);
2420         }
2421         return err;
2422 }
2423
2424 static const struct net_device_ops cxgb4vf_netdev_ops   = {
2425         .ndo_open               = cxgb4vf_open,
2426         .ndo_stop               = cxgb4vf_stop,
2427         .ndo_start_xmit         = t4vf_eth_xmit,
2428         .ndo_get_stats          = cxgb4vf_get_stats,
2429         .ndo_set_rx_mode        = cxgb4vf_set_rxmode,
2430         .ndo_set_mac_address    = cxgb4vf_set_mac_addr,
2431         .ndo_validate_addr      = eth_validate_addr,
2432         .ndo_do_ioctl           = cxgb4vf_do_ioctl,
2433         .ndo_change_mtu         = cxgb4vf_change_mtu,
2434         .ndo_vlan_rx_register   = cxgb4vf_vlan_rx_register,
2435 #ifdef CONFIG_NET_POLL_CONTROLLER
2436         .ndo_poll_controller    = cxgb4vf_poll_controller,
2437 #endif
2438 };
2439
2440 /*
2441  * "Probe" a device: initialize a device and construct all kernel and driver
2442  * state needed to manage the device.  This routine is called "init_one" in
2443  * the PF Driver ...
2444  */
2445 static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev,
2446                                        const struct pci_device_id *ent)
2447 {
2448         static int version_printed;
2449
2450         int pci_using_dac;
2451         int err, pidx;
2452         unsigned int pmask;
2453         struct adapter *adapter;
2454         struct port_info *pi;
2455         struct net_device *netdev;
2456
2457         /*
2458          * Print our driver banner the first time we're called to initialize a
2459          * device.
2460          */
2461         if (version_printed == 0) {
2462                 printk(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
2463                 version_printed = 1;
2464         }
2465
2466         /*
2467          * Initialize generic PCI device state.
2468          */
2469         err = pci_enable_device(pdev);
2470         if (err) {
2471                 dev_err(&pdev->dev, "cannot enable PCI device\n");
2472                 return err;
2473         }
2474
2475         /*
2476          * Reserve PCI resources for the device.  If we can't get them some
2477          * other driver may have already claimed the device ...
2478          */
2479         err = pci_request_regions(pdev, KBUILD_MODNAME);
2480         if (err) {
2481                 dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2482                 goto err_disable_device;
2483         }
2484
2485         /*
2486          * Set up our DMA mask: try for 64-bit address masking first and
2487          * fall back to 32-bit if we can't get 64 bits ...
2488          */
2489         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2490         if (err == 0) {
2491                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2492                 if (err) {
2493                         dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2494                                 " coherent allocations\n");
2495                         goto err_release_regions;
2496                 }
2497                 pci_using_dac = 1;
2498         } else {
2499                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2500                 if (err != 0) {
2501                         dev_err(&pdev->dev, "no usable DMA configuration\n");
2502                         goto err_release_regions;
2503                 }
2504                 pci_using_dac = 0;
2505         }
2506
2507         /*
2508          * Enable bus mastering for the device ...
2509          */
2510         pci_set_master(pdev);
2511
2512         /*
2513          * Allocate our adapter data structure and attach it to the device.
2514          */
2515         adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2516         if (!adapter) {
2517                 err = -ENOMEM;
2518                 goto err_release_regions;
2519         }
2520         pci_set_drvdata(pdev, adapter);
2521         adapter->pdev = pdev;
2522         adapter->pdev_dev = &pdev->dev;
2523
2524         /*
2525          * Initialize SMP data synchronization resources.
2526          */
2527         spin_lock_init(&adapter->stats_lock);
2528
2529         /*
2530          * Map our I/O registers in BAR0.
2531          */
2532         adapter->regs = pci_ioremap_bar(pdev, 0);
2533         if (!adapter->regs) {
2534                 dev_err(&pdev->dev, "cannot map device registers\n");
2535                 err = -ENOMEM;
2536                 goto err_free_adapter;
2537         }
2538
2539         /*
2540          * Initialize adapter level features.
2541          */
2542         adapter->name = pci_name(pdev);
2543         adapter->msg_enable = dflt_msg_enable;
2544         err = adap_init0(adapter);
2545         if (err)
2546                 goto err_unmap_bar;
2547
2548         /*
2549          * Allocate our "adapter ports" and stitch everything together.
2550          */
2551         pmask = adapter->params.vfres.pmask;
2552         for_each_port(adapter, pidx) {
2553                 int port_id, viid;
2554
2555                 /*
2556                  * We simplistically allocate our virtual interfaces
2557                  * sequentially across the port numbers to which we have
2558                  * access rights.  This should be configurable in some manner
2559                  * ...
2560                  */
2561                 if (pmask == 0)
2562                         break;
2563                 port_id = ffs(pmask) - 1;
2564                 pmask &= ~(1 << port_id);
2565                 viid = t4vf_alloc_vi(adapter, port_id);
2566                 if (viid < 0) {
2567                         dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2568                                 " err=%d\n", port_id, viid);
2569                         err = viid;
2570                         goto err_free_dev;
2571                 }
2572
2573                 /*
2574                  * Allocate our network device and stitch things together.
2575                  */
2576                 netdev = alloc_etherdev_mq(sizeof(struct port_info),
2577                                            MAX_PORT_QSETS);
2578                 if (netdev == NULL) {
2579                         dev_err(&pdev->dev, "cannot allocate netdev for"
2580                                 " port %d\n", port_id);
2581                         t4vf_free_vi(adapter, viid);
2582                         err = -ENOMEM;
2583                         goto err_free_dev;
2584                 }
2585                 adapter->port[pidx] = netdev;
2586                 SET_NETDEV_DEV(netdev, &pdev->dev);
2587                 pi = netdev_priv(netdev);
2588                 pi->adapter = adapter;
2589                 pi->pidx = pidx;
2590                 pi->port_id = port_id;
2591                 pi->viid = viid;
2592
2593                 /*
2594                  * Initialize the starting state of our "port" and register
2595                  * it.
2596                  */
2597                 pi->xact_addr_filt = -1;
2598                 netif_carrier_off(netdev);
2599                 netdev->irq = pdev->irq;
2600
2601                 netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2602                         NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2603                         NETIF_F_HW_VLAN_TX | NETIF_F_RXCSUM;
2604                 netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2605                         NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2606                         NETIF_F_HIGHDMA;
2607                 netdev->features = netdev->hw_features |
2608                         NETIF_F_HW_VLAN_RX;
2609                 if (pci_using_dac)
2610                         netdev->features |= NETIF_F_HIGHDMA;
2611
2612                 netdev->netdev_ops = &cxgb4vf_netdev_ops;
2613                 SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops);
2614
2615                 /*
2616                  * Initialize the hardware/software state for the port.
2617                  */
2618                 err = t4vf_port_init(adapter, pidx);
2619                 if (err) {
2620                         dev_err(&pdev->dev, "cannot initialize port %d\n",
2621                                 pidx);
2622                         goto err_free_dev;
2623                 }
2624         }
2625
2626         /*
2627          * The "card" is now ready to go.  If any errors occur during device
2628          * registration we do not fail the whole "card" but rather proceed
2629          * only with the ports we manage to register successfully.  However we
2630          * must register at least one net device.
2631          */
2632         for_each_port(adapter, pidx) {
2633                 netdev = adapter->port[pidx];
2634                 if (netdev == NULL)
2635                         continue;
2636
2637                 err = register_netdev(netdev);
2638                 if (err) {
2639                         dev_warn(&pdev->dev, "cannot register net device %s,"
2640                                  " skipping\n", netdev->name);
2641                         continue;
2642                 }
2643
2644                 set_bit(pidx, &adapter->registered_device_map);
2645         }
2646         if (adapter->registered_device_map == 0) {
2647                 dev_err(&pdev->dev, "could not register any net devices\n");
2648                 goto err_free_dev;
2649         }
2650
2651         /*
2652          * Set up our debugfs entries.
2653          */
2654         if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
2655                 adapter->debugfs_root =
2656                         debugfs_create_dir(pci_name(pdev),
2657                                            cxgb4vf_debugfs_root);
2658                 if (IS_ERR_OR_NULL(adapter->debugfs_root))
2659                         dev_warn(&pdev->dev, "could not create debugfs"
2660                                  " directory");
2661                 else
2662                         setup_debugfs(adapter);
2663         }
2664
2665         /*
2666          * See what interrupts we'll be using.  If we've been configured to
2667          * use MSI-X interrupts, try to enable them but fall back to using
2668          * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
2669          * get MSI interrupts we bail with the error.
2670          */
2671         if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2672                 adapter->flags |= USING_MSIX;
2673         else {
2674                 err = pci_enable_msi(pdev);
2675                 if (err) {
2676                         dev_err(&pdev->dev, "Unable to allocate %s interrupts;"
2677                                 " err=%d\n",
2678                                 msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err);
2679                         goto err_free_debugfs;
2680                 }
2681                 adapter->flags |= USING_MSI;
2682         }
2683
2684         /*
2685          * Now that we know how many "ports" we have and what their types are,
2686          * and how many Queue Sets we can support, we can configure our queue
2687          * resources.
2688          */
2689         cfg_queues(adapter);
2690
2691         /*
2692          * Print a short notice on the existence and configuration of the new
2693          * VF network device ...
2694          */
2695         for_each_port(adapter, pidx) {
2696                 dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
2697                          adapter->port[pidx]->name,
2698                          (adapter->flags & USING_MSIX) ? "MSI-X" :
2699                          (adapter->flags & USING_MSI)  ? "MSI" : "");
2700         }
2701
2702         /*
2703          * Return success!
2704          */
2705         return 0;
2706
2707         /*
2708          * Error recovery and exit code.  Unwind state that's been created
2709          * so far and return the error.
2710          */
2711
2712 err_free_debugfs:
2713         if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2714                 cleanup_debugfs(adapter);
2715                 debugfs_remove_recursive(adapter->debugfs_root);
2716         }
2717
2718 err_free_dev:
2719         for_each_port(adapter, pidx) {
2720                 netdev = adapter->port[pidx];
2721                 if (netdev == NULL)
2722                         continue;
2723                 pi = netdev_priv(netdev);
2724                 t4vf_free_vi(adapter, pi->viid);
2725                 if (test_bit(pidx, &adapter->registered_device_map))
2726                         unregister_netdev(netdev);
2727                 free_netdev(netdev);
2728         }
2729
2730 err_unmap_bar:
2731         iounmap(adapter->regs);
2732
2733 err_free_adapter:
2734         kfree(adapter);
2735         pci_set_drvdata(pdev, NULL);
2736
2737 err_release_regions:
2738         pci_release_regions(pdev);
2739         pci_set_drvdata(pdev, NULL);
2740         pci_clear_master(pdev);
2741
2742 err_disable_device:
2743         pci_disable_device(pdev);
2744
2745         return err;
2746 }
2747
2748 /*
2749  * "Remove" a device: tear down all kernel and driver state created in the
2750  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
2751  * that this is called "remove_one" in the PF Driver.)
2752  */
2753 static void __devexit cxgb4vf_pci_remove(struct pci_dev *pdev)
2754 {
2755         struct adapter *adapter = pci_get_drvdata(pdev);
2756
2757         /*
2758          * Tear down driver state associated with device.
2759          */
2760         if (adapter) {
2761                 int pidx;
2762
2763                 /*
2764                  * Stop all of our activity.  Unregister network port,
2765                  * disable interrupts, etc.
2766                  */
2767                 for_each_port(adapter, pidx)
2768                         if (test_bit(pidx, &adapter->registered_device_map))
2769                                 unregister_netdev(adapter->port[pidx]);
2770                 t4vf_sge_stop(adapter);
2771                 if (adapter->flags & USING_MSIX) {
2772                         pci_disable_msix(adapter->pdev);
2773                         adapter->flags &= ~USING_MSIX;
2774                 } else if (adapter->flags & USING_MSI) {
2775                         pci_disable_msi(adapter->pdev);
2776                         adapter->flags &= ~USING_MSI;
2777                 }
2778
2779                 /*
2780                  * Tear down our debugfs entries.
2781                  */
2782                 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2783                         cleanup_debugfs(adapter);
2784                         debugfs_remove_recursive(adapter->debugfs_root);
2785                 }
2786
2787                 /*
2788                  * Free all of the various resources which we've acquired ...
2789                  */
2790                 t4vf_free_sge_resources(adapter);
2791                 for_each_port(adapter, pidx) {
2792                         struct net_device *netdev = adapter->port[pidx];
2793                         struct port_info *pi;
2794
2795                         if (netdev == NULL)
2796                                 continue;
2797
2798                         pi = netdev_priv(netdev);
2799                         t4vf_free_vi(adapter, pi->viid);
2800                         free_netdev(netdev);
2801                 }
2802                 iounmap(adapter->regs);
2803                 kfree(adapter);
2804                 pci_set_drvdata(pdev, NULL);
2805         }
2806
2807         /*
2808          * Disable the device and release its PCI resources.
2809          */
2810         pci_disable_device(pdev);
2811         pci_clear_master(pdev);
2812         pci_release_regions(pdev);
2813 }
2814
2815 /*
2816  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
2817  * delivery.
2818  */
2819 static void __devexit cxgb4vf_pci_shutdown(struct pci_dev *pdev)
2820 {
2821         struct adapter *adapter;
2822         int pidx;
2823
2824         adapter = pci_get_drvdata(pdev);
2825         if (!adapter)
2826                 return;
2827
2828         /*
2829          * Disable all Virtual Interfaces.  This will shut down the
2830          * delivery of all ingress packets into the chip for these
2831          * Virtual Interfaces.
2832          */
2833         for_each_port(adapter, pidx) {
2834                 struct net_device *netdev;
2835                 struct port_info *pi;
2836
2837                 if (!test_bit(pidx, &adapter->registered_device_map))
2838                         continue;
2839
2840                 netdev = adapter->port[pidx];
2841                 if (!netdev)
2842                         continue;
2843
2844                 pi = netdev_priv(netdev);
2845                 t4vf_enable_vi(adapter, pi->viid, false, false);
2846         }
2847
2848         /*
2849          * Free up all Queues which will prevent further DMA and
2850          * Interrupts allowing various internal pathways to drain.
2851          */
2852         t4vf_free_sge_resources(adapter);
2853 }
2854
2855 /*
2856  * PCI Device registration data structures.
2857  */
2858 #define CH_DEVICE(devid, idx) \
2859         { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, PCI_ANY_ID, 0, 0, idx }
2860
2861 static struct pci_device_id cxgb4vf_pci_tbl[] = {
2862         CH_DEVICE(0xb000, 0),   /* PE10K FPGA */
2863         CH_DEVICE(0x4800, 0),   /* T440-dbg */
2864         CH_DEVICE(0x4801, 0),   /* T420-cr */
2865         CH_DEVICE(0x4802, 0),   /* T422-cr */
2866         CH_DEVICE(0x4803, 0),   /* T440-cr */
2867         CH_DEVICE(0x4804, 0),   /* T420-bch */
2868         CH_DEVICE(0x4805, 0),   /* T440-bch */
2869         CH_DEVICE(0x4806, 0),   /* T460-ch */
2870         CH_DEVICE(0x4807, 0),   /* T420-so */
2871         CH_DEVICE(0x4808, 0),   /* T420-cx */
2872         CH_DEVICE(0x4809, 0),   /* T420-bt */
2873         CH_DEVICE(0x480a, 0),   /* T404-bt */
2874         { 0, }
2875 };
2876
2877 MODULE_DESCRIPTION(DRV_DESC);
2878 MODULE_AUTHOR("Chelsio Communications");
2879 MODULE_LICENSE("Dual BSD/GPL");
2880 MODULE_VERSION(DRV_VERSION);
2881 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
2882
2883 static struct pci_driver cxgb4vf_driver = {
2884         .name           = KBUILD_MODNAME,
2885         .id_table       = cxgb4vf_pci_tbl,
2886         .probe          = cxgb4vf_pci_probe,
2887         .remove         = __devexit_p(cxgb4vf_pci_remove),
2888         .shutdown       = __devexit_p(cxgb4vf_pci_shutdown),
2889 };
2890
2891 /*
2892  * Initialize global driver state.
2893  */
2894 static int __init cxgb4vf_module_init(void)
2895 {
2896         int ret;
2897
2898         /*
2899          * Vet our module parameters.
2900          */
2901         if (msi != MSI_MSIX && msi != MSI_MSI) {
2902                 printk(KERN_WARNING KBUILD_MODNAME
2903                        ": bad module parameter msi=%d; must be %d"
2904                        " (MSI-X or MSI) or %d (MSI)\n",
2905                        msi, MSI_MSIX, MSI_MSI);
2906                 return -EINVAL;
2907         }
2908
2909         /* Debugfs support is optional, just warn if this fails */
2910         cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
2911         if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2912                 printk(KERN_WARNING KBUILD_MODNAME ": could not create"
2913                        " debugfs entry, continuing\n");
2914
2915         ret = pci_register_driver(&cxgb4vf_driver);
2916         if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2917                 debugfs_remove(cxgb4vf_debugfs_root);
2918         return ret;
2919 }
2920
2921 /*
2922  * Tear down global driver state.
2923  */
2924 static void __exit cxgb4vf_module_exit(void)
2925 {
2926         pci_unregister_driver(&cxgb4vf_driver);
2927         debugfs_remove(cxgb4vf_debugfs_root);
2928 }
2929
2930 module_init(cxgb4vf_module_init);
2931 module_exit(cxgb4vf_module_exit);