drivers/staging/unisys/visornic/visornic_main.c

   1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
   2  * All rights reserved.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful, but
   9  * WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11  * NON INFRINGEMENT.  See the GNU General Public License for more
  12  * details.
  13  */
  14
  15 /* This driver lives in a spar partition, and registers to ethernet io
  16  * channels from the visorbus driver. It creates netdev devices and
  17  * forwards transmit to the IO channel and accepts rcvs from the IO
  18  * Partition via the IO channel.
  19  */
  20
  21 #include <linux/debugfs.h>
  22 #include <linux/etherdevice.h>
  23 #include <linux/netdevice.h>
  24 #include <linux/kthread.h>
  25 #include <linux/skbuff.h>
  26 #include <linux/rtnetlink.h>
  27
  28 #include "visorbus.h"
  29 #include "iochannel.h"
  30
  31 #define VISORNIC_INFINITE_RSP_WAIT 0
  32 #define VISORNICSOPENMAX 32
  33 #define MAXDEVICES     16384
  34
  35 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
  36  *         = 163840 bytes
  37  */
  38 #define MAX_BUF 163840
  39 #define NAPI_WEIGHT 64
  40
  41 static int visornic_probe(struct visor_device *dev);
  42 static void visornic_remove(struct visor_device *dev);
  43 static int visornic_pause(struct visor_device *dev,
  44                           visorbus_state_complete_func complete_func);
  45 static int visornic_resume(struct visor_device *dev,
  46                            visorbus_state_complete_func complete_func);
  47
  48 /* DEBUGFS declarations */
  49 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
  50                                  size_t len, loff_t *offset);
  51 static ssize_t enable_ints_write(struct file *file, const char __user *buf,
  52                                  size_t len, loff_t *ppos);
  53 static struct dentry *visornic_debugfs_dir;
  54 static const struct file_operations debugfs_info_fops = {
  55         .read = info_debugfs_read,
  56 };
  57
  58 static const struct file_operations debugfs_enable_ints_fops = {
  59         .write = enable_ints_write,
  60 };
  61
  62 /* GUIDS for director channel type supported by this driver.  */
  63 static struct visor_channeltype_descriptor visornic_channel_types[] = {
  64         /* Note that the only channel type we expect to be reported by the
  65          * bus driver is the SPAR_VNIC channel.
  66          */
  67         { SPAR_VNIC_CHANNEL_PROTOCOL_UUID, "ultravnic" },
  68         { NULL_UUID_LE, NULL }
  69 };
  70 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
  71 /*
  72  * FIXME XXX: This next line of code must be fixed and removed before
  73  * acceptance into the 'normal' part of the kernel.  It is only here as a place
  74  * holder to get module autoloading functionality working for visorbus.  Code
  75  * must be added to scripts/mode/file2alias.c, etc., to get this working
  76  * properly.
  77  */
  78 MODULE_ALIAS("visorbus:" SPAR_VNIC_CHANNEL_PROTOCOL_UUID_STR);
  79
  80 /* This is used to tell the visor bus driver which types of visor devices
  81  * we support, and what functions to call when a visor device that we support
  82  * is attached or removed.
  83  */
  84 static struct visor_driver visornic_driver = {
  85         .name = "visornic",
  86         .version = "1.0.0.0",
  87         .vertag = NULL,
  88         .owner = THIS_MODULE,
  89         .channel_types = visornic_channel_types,
  90         .probe = visornic_probe,
  91         .remove = visornic_remove,
  92         .pause = visornic_pause,
  93         .resume = visornic_resume,
  94         .channel_interrupt = NULL,
  95 };
  96
  97 struct chanstat {
  98         unsigned long got_rcv;
  99         unsigned long got_enbdisack;
 100         unsigned long got_xmit_done;
 101         unsigned long xmit_fail;
 102         unsigned long sent_enbdis;
 103         unsigned long sent_promisc;
 104         unsigned long sent_post;
 105         unsigned long sent_post_failed;
 106         unsigned long sent_xmit;
 107         unsigned long reject_count;
 108         unsigned long extra_rcvbufs_sent;
 109 };
 110
 111 struct visornic_devdata {
 112         /* 0 disabled 1 enabled to receive */
 113         unsigned short enabled;
 114         /* NET_RCV_ENABLE/DISABLE acked by IOPART */
 115         unsigned short enab_dis_acked;
 116
 117         struct visor_device *dev;
 118         struct net_device *netdev;
 119         struct net_device_stats net_stats;
 120         atomic_t interrupt_rcvd;
 121         wait_queue_head_t rsp_queue;
 122         struct sk_buff **rcvbuf;
 123         /* incarnation_id lets IOPART know about re-birth */
 124         u64 incarnation_id;
 125         /* flags as they were prior to set_multicast_list */
 126         unsigned short old_flags;
 127         atomic_t usage; /* count of users */
 128
 129         /* number of rcv buffers the vnic will post */
 130         int num_rcv_bufs;
 131         int num_rcv_bufs_could_not_alloc;
 132         atomic_t num_rcvbuf_in_iovm;
 133         unsigned long alloc_failed_in_if_needed_cnt;
 134         unsigned long alloc_failed_in_repost_rtn_cnt;
 135
 136         /* absolute max number of outstanding xmits - should never hit this */
 137         unsigned long max_outstanding_net_xmits;
 138         /* high water mark for calling netif_stop_queue() */
 139         unsigned long upper_threshold_net_xmits;
 140         /* high water mark for calling netif_wake_queue() */
 141         unsigned long lower_threshold_net_xmits;
 142         /* xmitbufhead - head of the xmit buffer list sent to the IOPART end */
 143         struct sk_buff_head xmitbufhead;
 144
 145         visorbus_state_complete_func server_down_complete_func;
 146         struct work_struct timeout_reset;
 147         /* cmdrsp_rcv is used for posting/unposting rcv buffers  */
 148         struct uiscmdrsp *cmdrsp_rcv;
 149         /* xmit_cmdrsp - issues NET_XMIT - only one active xmit at a time */
 150         struct uiscmdrsp *xmit_cmdrsp;
 151
 152         bool server_down;                /* IOPART is down */
 153         bool server_change_state;        /* Processing SERVER_CHANGESTATE msg */
 154         bool going_away;                 /* device is being torn down */
 155         struct dentry *eth_debugfs_dir;
 156         u64 interrupts_rcvd;
 157         u64 interrupts_notme;
 158         u64 interrupts_disabled;
 159         u64 busy_cnt;
 160         spinlock_t priv_lock;  /* spinlock to access devdata structures */
 161
 162         /* flow control counter */
 163         u64 flow_control_upper_hits;
 164         u64 flow_control_lower_hits;
 165
 166         /* debug counters */
 167         unsigned long n_rcv0;                   /* # rcvs of 0 buffers */
 168         unsigned long n_rcv1;                   /* # rcvs of 1 buffers */
 169         unsigned long n_rcv2;                   /* # rcvs of 2 buffers */
 170         unsigned long n_rcvx;                   /* # rcvs of >2 buffers */
 171         unsigned long found_repost_rcvbuf_cnt;  /* # repost_rcvbuf_cnt */
 172         unsigned long repost_found_skb_cnt;     /* # of found the skb */
 173         unsigned long n_repost_deficit;         /* # of lost rcv buffers */
 174         unsigned long bad_rcv_buf; /* # of unknown rcv skb  not freed */
 175         unsigned long n_rcv_packets_not_accepted;/* # bogs rcv packets */
 176
 177         int queuefullmsg_logged;
 178         struct chanstat chstat;
 179         struct timer_list irq_poll_timer;
 180         struct napi_struct napi;
 181         struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
 182 };
 183
 184 static int visornic_poll(struct napi_struct *napi, int budget);
 185 static void poll_for_irq(unsigned long v);
 186
 187 /**
 188  *      visor_copy_fragsinfo_from_skb(
 189  *      @skb_in: skbuff that we are pulling the frags from
 190  *      @firstfraglen: length of first fragment in skb
 191  *      @frags_max: max len of frags array
 192  *      @frags: frags array filled in on output
 193  *
 194  *      Copy the fragment list in the SKB to a phys_info
 195  *      array that the IOPART understands.
 196  *      Return value indicates number of entries filled in frags
 197  *      Negative values indicate an error.
 198  */
 199 static int
 200 visor_copy_fragsinfo_from_skb(struct sk_buff *skb, unsigned int firstfraglen,
 201                               unsigned int frags_max,
 202                               struct phys_info frags[])
 203 {
 204         unsigned int count = 0, frag, size, offset = 0, numfrags;
 205         unsigned int total_count;
 206
 207         numfrags = skb_shinfo(skb)->nr_frags;
 208
 209         /* Compute the number of fragments this skb has, and if its more than
 210          * frag array can hold, linearize the skb
 211          */
 212         total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
 213         if (firstfraglen % PI_PAGE_SIZE)
 214                 total_count++;
 215
 216         if (total_count > frags_max) {
 217                 if (skb_linearize(skb))
 218                         return -EINVAL;
 219                 numfrags = skb_shinfo(skb)->nr_frags;
 220                 firstfraglen = 0;
 221         }
 222
 223         while (firstfraglen) {
 224                 if (count == frags_max)
 225                         return -EINVAL;
 226
 227                 frags[count].pi_pfn =
 228                         page_to_pfn(virt_to_page(skb->data + offset));
 229                 frags[count].pi_off =
 230                         (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
 231                 size = min_t(unsigned int, firstfraglen,
 232                              PI_PAGE_SIZE - frags[count].pi_off);
 233
 234                 /* can take smallest of firstfraglen (what's left) OR
 235                  * bytes left in the page
 236                  */
 237                 frags[count].pi_len = size;
 238                 firstfraglen -= size;
 239                 offset += size;
 240                 count++;
 241         }
 242         if (numfrags) {
 243                 if ((count + numfrags) > frags_max)
 244                         return -EINVAL;
 245
 246                 for (frag = 0; frag < numfrags; frag++) {
 247                         count = add_physinfo_entries(page_to_pfn(
 248                                 skb_frag_page(&skb_shinfo(skb)->frags[frag])),
 249                                               skb_shinfo(skb)->frags[frag].
 250                                               page_offset,
 251                                               skb_shinfo(skb)->frags[frag].
 252                                               size, count, frags_max, frags);
 253                         /* add_physinfo_entries only returns
 254                          * zero if the frags array is out of room
 255                          * That should never happen because we
 256                          * fail above, if count+numfrags > frags_max.
 257                          */
 258                         if (!count)
 259                                 return -EINVAL;
 260                 }
 261         }
 262         if (skb_shinfo(skb)->frag_list) {
 263                 struct sk_buff *skbinlist;
 264                 int c;
 265
 266                 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
 267                      skbinlist = skbinlist->next) {
 268                         c = visor_copy_fragsinfo_from_skb(skbinlist,
 269                                                           skbinlist->len -
 270                                                           skbinlist->data_len,
 271                                                           frags_max - count,
 272                                                           &frags[count]);
 273                         if (c < 0)
 274                                 return c;
 275                         count += c;
 276                 }
 277         }
 278         return count;
 279 }
 280
 281 static ssize_t enable_ints_write(struct file *file,
 282                                  const char __user *buffer,
 283                                  size_t count, loff_t *ppos)
 284 {
 285         /* Don't want to break ABI here by having a debugfs
 286          * file that no longer exists or is writable, so
 287          * lets just make this a vestigual function
 288          */
 289         return count;
 290 }
 291
 292 /**
 293  *      visornic_serverdown_complete - IOPART went down, pause device
 294  *      @work: Work queue it was scheduled on
 295  *
 296  *      The IO partition has gone down and we need to do some cleanup
 297  *      for when it comes back. Treat the IO partition as the link
 298  *      being down.
 299  *      Returns void.
 300  */
 301 static void
 302 visornic_serverdown_complete(struct visornic_devdata *devdata)
 303 {
 304         struct net_device *netdev;
 305
 306         netdev = devdata->netdev;
 307
 308         /* Stop polling for interrupts */
 309         del_timer_sync(&devdata->irq_poll_timer);
 310
 311         rtnl_lock();
 312         dev_close(netdev);
 313         rtnl_unlock();
 314
 315         atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
 316         devdata->chstat.sent_xmit = 0;
 317         devdata->chstat.got_xmit_done = 0;
 318
 319         if (devdata->server_down_complete_func)
 320                 (*devdata->server_down_complete_func)(devdata->dev, 0);
 321
 322         devdata->server_down = true;
 323         devdata->server_change_state = false;
 324         devdata->server_down_complete_func = NULL;
 325 }
 326
 327 /**
 328  *      visornic_serverdown - Command has notified us that IOPART is down
 329  *      @devdata: device that is being managed by IOPART
 330  *
 331  *      Schedule the work needed to handle the server down request. Make
 332  *      sure we haven't already handled the server change state event.
 333  *      Returns 0 if we scheduled the work, -EINVAL on error.
 334  */
 335 static int
 336 visornic_serverdown(struct visornic_devdata *devdata,
 337                     visorbus_state_complete_func complete_func)
 338 {
 339         unsigned long flags;
 340         int err;
 341
 342         spin_lock_irqsave(&devdata->priv_lock, flags);
 343         if (devdata->server_change_state) {
 344                 dev_dbg(&devdata->dev->device, "%s changing state\n",
 345                         __func__);
 346                 err = -EINVAL;
 347                 goto err_unlock;
 348         }
 349         if (devdata->server_down) {
 350                 dev_dbg(&devdata->dev->device, "%s already down\n",
 351                         __func__);
 352                 err = -EINVAL;
 353                 goto err_unlock;
 354         }
 355         if (devdata->going_away) {
 356                 dev_dbg(&devdata->dev->device,
 357                         "%s aborting because device removal pending\n",
 358                         __func__);
 359                 err = -ENODEV;
 360                 goto err_unlock;
 361         }
 362         devdata->server_change_state = true;
 363         devdata->server_down_complete_func = complete_func;
 364         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 365
 366         visornic_serverdown_complete(devdata);
 367         return 0;
 368
 369 err_unlock:
 370         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 371         return err;
 372 }
 373
 374 /**
 375  *      alloc_rcv_buf   - alloc rcv buffer to be given to the IO Partition.
 376  *      @netdev: network adapter the rcv bufs are attached too.
 377  *
 378  *      Create an sk_buff (rcv_buf) that will be passed to the IO Partition
 379  *      so that it can write rcv data into our memory space.
 380  *      Return pointer to sk_buff
 381  */
 382 static struct sk_buff *
 383 alloc_rcv_buf(struct net_device *netdev)
 384 {
 385         struct sk_buff *skb;
 386
 387         /* NOTE: the first fragment in each rcv buffer is pointed to by
 388          * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
 389          * in length, so the first frag is large enough to hold 1514.
 390          */
 391         skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
 392         if (!skb)
 393                 return NULL;
 394         skb->dev = netdev;
 395         /* current value of mtu doesn't come into play here; large
 396          * packets will just end up using multiple rcv buffers all of
 397          * same size.
 398          */
 399         skb->len = RCVPOST_BUF_SIZE;
 400         /* alloc_skb already zeroes it out for clarification. */
 401         skb->data_len = 0;
 402         return skb;
 403 }
 404
 405 /**
 406  *      post_skb        - post a skb to the IO Partition.
 407  *      @cmdrsp: cmdrsp packet to be send to the IO Partition
 408  *      @devdata: visornic_devdata to post the skb too
 409  *      @skb: skb to give to the IO partition
 410  *
 411  *      Send the skb to the IO Partition.
 412  *      Returns void
 413  */
 414 static inline void
 415 post_skb(struct uiscmdrsp *cmdrsp,
 416          struct visornic_devdata *devdata, struct sk_buff *skb)
 417 {
 418         cmdrsp->net.buf = skb;
 419         cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
 420         cmdrsp->net.rcvpost.frag.pi_off =
 421                 (unsigned long)skb->data & PI_PAGE_MASK;
 422         cmdrsp->net.rcvpost.frag.pi_len = skb->len;
 423         cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
 424
 425         if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) <= PI_PAGE_SIZE) {
 426                 cmdrsp->net.type = NET_RCV_POST;
 427                 cmdrsp->cmdtype = CMD_NET_TYPE;
 428                 if (visorchannel_signalinsert(devdata->dev->visorchannel,
 429                                               IOCHAN_TO_IOPART,
 430                                               cmdrsp)) {
 431                         atomic_inc(&devdata->num_rcvbuf_in_iovm);
 432                         devdata->chstat.sent_post++;
 433                 } else {
 434                         devdata->chstat.sent_post_failed++;
 435                 }
 436         }
 437 }
 438
 439 /**
 440  *      send_enbdis     - send NET_RCV_ENBDIS to IO Partition
 441  *      @netdev: netdevice we are enable/disable, used as context
 442  *               return value
 443  *      @state: enable = 1/disable = 0
 444  *      @devdata: visornic device we are enabling/disabling
 445  *
 446  *      Send the enable/disable message to the IO Partition.
 447  *      Returns void
 448  */
 449 static void
 450 send_enbdis(struct net_device *netdev, int state,
 451             struct visornic_devdata *devdata)
 452 {
 453         devdata->cmdrsp_rcv->net.enbdis.enable = state;
 454         devdata->cmdrsp_rcv->net.enbdis.context = netdev;
 455         devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
 456         devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
 457         if (visorchannel_signalinsert(devdata->dev->visorchannel,
 458                                       IOCHAN_TO_IOPART,
 459                                       devdata->cmdrsp_rcv))
 460                 devdata->chstat.sent_enbdis++;
 461 }
 462
 463 /**
 464  *      visornic_disable_with_timeout - Disable network adapter
 465  *      @netdev: netdevice to disale
 466  *      @timeout: timeout to wait for disable
 467  *
 468  *      Disable the network adapter and inform the IO Partition that we
 469  *      are disabled, reclaim memory from rcv bufs.
 470  *      Returns 0 on success, negative for failure of IO Partition
 471  *      responding.
 472  *
 473  */
 474 static int
 475 visornic_disable_with_timeout(struct net_device *netdev, const int timeout)
 476 {
 477         struct visornic_devdata *devdata = netdev_priv(netdev);
 478         int i;
 479         unsigned long flags;
 480         int wait = 0;
 481
 482         /* send a msg telling the other end we are stopping incoming pkts */
 483         spin_lock_irqsave(&devdata->priv_lock, flags);
 484         devdata->enabled = 0;
 485         devdata->enab_dis_acked = 0; /* must wait for ack */
 486         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 487
 488         /* send disable and wait for ack -- don't hold lock when sending
 489          * disable because if the queue is full, insert might sleep.
 490          */
 491         send_enbdis(netdev, 0, devdata);
 492
 493         /* wait for ack to arrive before we try to free rcv buffers
 494          * NOTE: the other end automatically unposts the rcv buffers when
 495          * when it gets a disable.
 496          */
 497         spin_lock_irqsave(&devdata->priv_lock, flags);
 498         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 499                (wait < timeout)) {
 500                 if (devdata->enab_dis_acked)
 501                         break;
 502                 if (devdata->server_down || devdata->server_change_state) {
 503                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 504                         dev_dbg(&netdev->dev, "%s server went away\n",
 505                                 __func__);
 506                         return -EIO;
 507                 }
 508                 set_current_state(TASK_INTERRUPTIBLE);
 509                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 510                 wait += schedule_timeout(msecs_to_jiffies(10));
 511                 spin_lock_irqsave(&devdata->priv_lock, flags);
 512         }
 513
 514         /* Wait for usage to go to 1 (no other users) before freeing
 515          * rcv buffers
 516          */
 517         if (atomic_read(&devdata->usage) > 1) {
 518                 while (1) {
 519                         set_current_state(TASK_INTERRUPTIBLE);
 520                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 521                         schedule_timeout(msecs_to_jiffies(10));
 522                         spin_lock_irqsave(&devdata->priv_lock, flags);
 523                         if (atomic_read(&devdata->usage))
 524                                 break;
 525                 }
 526         }
 527         /* we've set enabled to 0, so we can give up the lock. */
 528         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 529
 530         /* stop the transmit queue so nothing more can be transmitted */
 531         netif_stop_queue(netdev);
 532
 533         napi_disable(&devdata->napi);
 534
 535         skb_queue_purge(&devdata->xmitbufhead);
 536
 537         /* Free rcv buffers - other end has automatically unposed them on
 538          * disable
 539          */
 540         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 541                 if (devdata->rcvbuf[i]) {
 542                         kfree_skb(devdata->rcvbuf[i]);
 543                         devdata->rcvbuf[i] = NULL;
 544                 }
 545         }
 546
 547         return 0;
 548 }
 549
 550 /**
 551  *      init_rcv_bufs  -- initialize receive bufs and send them to the IO Part
 552  *      @netdev: struct netdevice
 553  *      @devdata: visornic_devdata
 554  *
 555  *      Allocate rcv buffers and post them to the IO Partition.
 556  *      Return 0 for success, and negative for failure.
 557  */
 558 static int
 559 init_rcv_bufs(struct net_device *netdev, struct visornic_devdata *devdata)
 560 {
 561         int i, count;
 562
 563         /* allocate fixed number of receive buffers to post to uisnic
 564          * post receive buffers after we've allocated a required amount
 565          */
 566         for (i = 0; i < devdata->num_rcv_bufs; i++) {
 567                 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
 568                 if (!devdata->rcvbuf[i])
 569                         break; /* if we failed to allocate one let us stop */
 570         }
 571         if (i == 0) /* couldn't even allocate one -- bail out */
 572                 return -ENOMEM;
 573         count = i;
 574
 575         /* Ensure we can alloc 2/3rd of the requeested number of buffers.
 576          * 2/3 is an arbitrary choice; used also in ndis init.c
 577          */
 578         if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
 579                 /* free receive buffers we did alloc and then bail out */
 580                 for (i = 0; i < count; i++) {
 581                         kfree_skb(devdata->rcvbuf[i]);
 582                         devdata->rcvbuf[i] = NULL;
 583                 }
 584                 return -ENOMEM;
 585         }
 586
 587         /* post receive buffers to receive incoming input - without holding
 588          * lock - we've not enabled nor started the queue so there shouldn't
 589          * be any rcv or xmit activity
 590          */
 591         for (i = 0; i < count; i++)
 592                 post_skb(devdata->cmdrsp_rcv, devdata, devdata->rcvbuf[i]);
 593
 594         return 0;
 595 }
 596
 597 /**
 598  *      visornic_enable_with_timeout    - send enable to IO Part
 599  *      @netdev: struct net_device
 600  *      @timeout: Time to wait for the ACK from the enable
 601  *
 602  *      Sends enable to IOVM, inits, and posts receive buffers to IOVM
 603  *      timeout is defined in msecs (timeout of 0 specifies infinite wait)
 604  *      Return 0 for success, negavite for failure.
 605  */
 606 static int
 607 visornic_enable_with_timeout(struct net_device *netdev, const int timeout)
 608 {
 609         int i;
 610         struct visornic_devdata *devdata = netdev_priv(netdev);
 611         unsigned long flags;
 612         int wait = 0;
 613
 614         /* NOTE: the other end automatically unposts the rcv buffers when it
 615          * gets a disable.
 616          */
 617         i = init_rcv_bufs(netdev, devdata);
 618         if (i < 0) {
 619                 dev_err(&netdev->dev,
 620                         "%s failed to init rcv bufs (%d)\n", __func__, i);
 621                 return i;
 622         }
 623
 624         spin_lock_irqsave(&devdata->priv_lock, flags);
 625         devdata->enabled = 1;
 626         devdata->enab_dis_acked = 0;
 627
 628         /* now we're ready, let's send an ENB to uisnic but until we get
 629          * an ACK back from uisnic, we'll drop the packets
 630          */
 631         devdata->n_rcv_packets_not_accepted = 0;
 632         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 633
 634         /* send enable and wait for ack -- don't hold lock when sending enable
 635          * because if the queue is full, insert might sleep.
 636          */
 637         napi_enable(&devdata->napi);
 638         send_enbdis(netdev, 1, devdata);
 639
 640         spin_lock_irqsave(&devdata->priv_lock, flags);
 641         while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
 642                (wait < timeout)) {
 643                 if (devdata->enab_dis_acked)
 644                         break;
 645                 if (devdata->server_down || devdata->server_change_state) {
 646                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 647                         dev_dbg(&netdev->dev, "%s server went away\n",
 648                                 __func__);
 649                         return -EIO;
 650                 }
 651                 set_current_state(TASK_INTERRUPTIBLE);
 652                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 653                 wait += schedule_timeout(msecs_to_jiffies(10));
 654                 spin_lock_irqsave(&devdata->priv_lock, flags);
 655         }
 656
 657         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 658
 659         if (!devdata->enab_dis_acked) {
 660                 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
 661                 return -EIO;
 662         }
 663
 664         netif_start_queue(netdev);
 665
 666         return 0;
 667 }
 668
 669 /**
 670  *      visornic_timeout_reset  - handle xmit timeout resets
 671  *      @work   work item that scheduled the work
 672  *
 673  *      Transmit Timeouts are typically handled by resetting the
 674  *      device for our virtual NIC we will send a Disable and Enable
 675  *      to the IOVM. If it doesn't respond we will trigger a serverdown.
 676  */
 677 static void
 678 visornic_timeout_reset(struct work_struct *work)
 679 {
 680         struct visornic_devdata *devdata;
 681         struct net_device *netdev;
 682         int response = 0;
 683
 684         devdata = container_of(work, struct visornic_devdata, timeout_reset);
 685         netdev = devdata->netdev;
 686
 687         rtnl_lock();
 688         if (!netif_running(netdev)) {
 689                 rtnl_unlock();
 690                 return;
 691         }
 692
 693         response = visornic_disable_with_timeout(netdev,
 694                                                  VISORNIC_INFINITE_RSP_WAIT);
 695         if (response)
 696                 goto call_serverdown;
 697
 698         response = visornic_enable_with_timeout(netdev,
 699                                                 VISORNIC_INFINITE_RSP_WAIT);
 700         if (response)
 701                 goto call_serverdown;
 702
 703         rtnl_unlock();
 704
 705         return;
 706
 707 call_serverdown:
 708         visornic_serverdown(devdata, NULL);
 709         rtnl_unlock();
 710 }
 711
 712 /**
 713  *      visornic_open - Enable the visornic device and mark the queue started
 714  *      @netdev: netdevice to start
 715  *
 716  *      Enable the device and start the transmit queue.
 717  *      Return 0 for success
 718  */
 719 static int
 720 visornic_open(struct net_device *netdev)
 721 {
 722         visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 723
 724         return 0;
 725 }
 726
 727 /**
 728  *      visornic_close - Disables the visornic device and stops the queues
 729  *      @netdev: netdevice to start
 730  *
 731  *      Disable the device and stop the transmit queue.
 732  *      Return 0 for success
 733  */
 734 static int
 735 visornic_close(struct net_device *netdev)
 736 {
 737         visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
 738
 739         return 0;
 740 }
 741
 742 /**
 743  *      devdata_xmits_outstanding - compute outstanding xmits
 744  *      @devdata: visornic_devdata for device
 745  *
 746  *      Return value is the number of outstanding xmits.
 747  */
 748 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
 749 {
 750         if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
 751                 return devdata->chstat.sent_xmit -
 752                         devdata->chstat.got_xmit_done;
 753         return (ULONG_MAX - devdata->chstat.got_xmit_done
 754                 + devdata->chstat.sent_xmit + 1);
 755 }
 756
 757 /**
 758  *      vnic_hit_high_watermark
 759  *      @devdata: indicates visornic device we are checking
 760  *      @high_watermark: max num of unacked xmits we will tolerate,
 761  *                       before we will start throttling
 762  *
 763  *      Returns true iff the number of unacked xmits sent to
 764  *      the IO partition is >= high_watermark.
 765  */
 766 static inline bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
 767                                            ulong high_watermark)
 768 {
 769         return (devdata_xmits_outstanding(devdata) >= high_watermark);
 770 }
 771
 772 /**
 773  *      vnic_hit_low_watermark
 774  *      @devdata: indicates visornic device we are checking
 775  *      @low_watermark: we will wait until the num of unacked xmits
 776  *                      drops to this value or lower before we start
 777  *                      transmitting again
 778  *
 779  *      Returns true iff the number of unacked xmits sent to
 780  *      the IO partition is <= low_watermark.
 781  */
 782 static inline bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
 783                                           ulong low_watermark)
 784 {
 785         return (devdata_xmits_outstanding(devdata) <= low_watermark);
 786 }
 787
 788 /**
 789  *      visornic_xmit - send a packet to the IO Partition
 790  *      @skb: Packet to be sent
 791  *      @netdev: net device the packet is being sent from
 792  *
 793  *      Convert the skb to a cmdrsp so the IO Partition can undersand it.
 794  *      Send the XMIT command to the IO Partition for processing. This
 795  *      function is protected from concurrent calls by a spinlock xmit_lock
 796  *      in the net_device struct, but as soon as the function returns it
 797  *      can be called again.
 798  *      Returns NETDEV_TX_OK.
 799  */
 800 static int
 801 visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
 802 {
 803         struct visornic_devdata *devdata;
 804         int len, firstfraglen, padlen;
 805         struct uiscmdrsp *cmdrsp = NULL;
 806         unsigned long flags;
 807
 808         devdata = netdev_priv(netdev);
 809         spin_lock_irqsave(&devdata->priv_lock, flags);
 810
 811         if (netif_queue_stopped(netdev) || devdata->server_down ||
 812             devdata->server_change_state) {
 813                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 814                 devdata->busy_cnt++;
 815                 dev_dbg(&netdev->dev,
 816                         "%s busy - queue stopped\n", __func__);
 817                 kfree_skb(skb);
 818                 return NETDEV_TX_OK;
 819         }
 820
 821         /* sk_buff struct is used to host network data throughout all the
 822          * linux network subsystems
 823          */
 824         len = skb->len;
 825
 826         /* skb->len is the FULL length of data (including fragmentary portion)
 827          * skb->data_len is the length of the fragment portion in frags
 828          * skb->len - skb->data_len is size of the 1st fragment in skb->data
 829          * calculate the length of the first fragment that skb->data is
 830          * pointing to
 831          */
 832         firstfraglen = skb->len - skb->data_len;
 833         if (firstfraglen < ETH_HEADER_SIZE) {
 834                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 835                 devdata->busy_cnt++;
 836                 dev_err(&netdev->dev,
 837                         "%s busy - first frag too small (%d)\n",
 838                         __func__, firstfraglen);
 839                 kfree_skb(skb);
 840                 return NETDEV_TX_OK;
 841         }
 842
 843         if ((len < ETH_MIN_PACKET_SIZE) &&
 844             ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
 845                 /* pad the packet out to minimum size */
 846                 padlen = ETH_MIN_PACKET_SIZE - len;
 847                 memset(&skb->data[len], 0, padlen);
 848                 skb->tail += padlen;
 849                 skb->len += padlen;
 850                 len += padlen;
 851                 firstfraglen += padlen;
 852         }
 853
 854         cmdrsp = devdata->xmit_cmdrsp;
 855         /* clear cmdrsp */
 856         memset(cmdrsp, 0, SIZEOF_CMDRSP);
 857         cmdrsp->net.type = NET_XMIT;
 858         cmdrsp->cmdtype = CMD_NET_TYPE;
 859
 860         /* save the pointer to skb -- we'll need it for completion */
 861         cmdrsp->net.buf = skb;
 862
 863         if (vnic_hit_high_watermark(devdata,
 864                                     devdata->max_outstanding_net_xmits)) {
 865                 /* extra NET_XMITs queued over to IOVM - need to wait */
 866                 devdata->chstat.reject_count++;
 867                 if (!devdata->queuefullmsg_logged &&
 868                     ((devdata->chstat.reject_count & 0x3ff) == 1))
 869                         devdata->queuefullmsg_logged = 1;
 870                 netif_stop_queue(netdev);
 871                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 872                 devdata->busy_cnt++;
 873                 dev_dbg(&netdev->dev,
 874                         "%s busy - waiting for iovm to catch up\n",
 875                         __func__);
 876                 kfree_skb(skb);
 877                 return NETDEV_TX_OK;
 878         }
 879         if (devdata->queuefullmsg_logged)
 880                 devdata->queuefullmsg_logged = 0;
 881
 882         if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
 883                 cmdrsp->net.xmt.lincsum.valid = 1;
 884                 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
 885                 if (skb_transport_header(skb) > skb->data) {
 886                         cmdrsp->net.xmt.lincsum.hrawoff =
 887                                 skb_transport_header(skb) - skb->data;
 888                         cmdrsp->net.xmt.lincsum.hrawoff = 1;
 889                 }
 890                 if (skb_network_header(skb) > skb->data) {
 891                         cmdrsp->net.xmt.lincsum.nhrawoff =
 892                                 skb_network_header(skb) - skb->data;
 893                         cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
 894                 }
 895                 cmdrsp->net.xmt.lincsum.csum = skb->csum;
 896         } else {
 897                 cmdrsp->net.xmt.lincsum.valid = 0;
 898         }
 899
 900         /* save off the length of the entire data packet */
 901         cmdrsp->net.xmt.len = len;
 902
 903         /* copy ethernet header from first frag into ocmdrsp
 904          * - everything else will be pass in frags & DMA'ed
 905          */
 906         memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HEADER_SIZE);
 907         /* copy frags info - from skb->data we need to only provide access
 908          * beyond eth header
 909          */
 910         cmdrsp->net.xmt.num_frags =
 911                 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
 912                                               MAX_PHYS_INFO,
 913                                               cmdrsp->net.xmt.frags);
 914         if (cmdrsp->net.xmt.num_frags < 0) {
 915                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 916                 devdata->busy_cnt++;
 917                 dev_err(&netdev->dev,
 918                         "%s busy - copy frags failed\n", __func__);
 919                 kfree_skb(skb);
 920                 return NETDEV_TX_OK;
 921         }
 922
 923         if (!visorchannel_signalinsert(devdata->dev->visorchannel,
 924                                        IOCHAN_TO_IOPART, cmdrsp)) {
 925                 netif_stop_queue(netdev);
 926                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
 927                 devdata->busy_cnt++;
 928                 dev_dbg(&netdev->dev,
 929                         "%s busy - signalinsert failed\n", __func__);
 930                 kfree_skb(skb);
 931                 return NETDEV_TX_OK;
 932         }
 933
 934         /* Track the skbs that have been sent to the IOVM for XMIT */
 935         skb_queue_head(&devdata->xmitbufhead, skb);
 936
 937         /* update xmt stats */
 938         devdata->net_stats.tx_packets++;
 939         devdata->net_stats.tx_bytes += skb->len;
 940         devdata->chstat.sent_xmit++;
 941
 942         /* check if we have hit the high watermark for netif_stop_queue() */
 943         if (vnic_hit_high_watermark(devdata,
 944                                     devdata->upper_threshold_net_xmits)) {
 945                 /* extra NET_XMITs queued over to IOVM - need to wait */
 946                 /* stop queue - call netif_wake_queue() after lower threshold */
 947                 netif_stop_queue(netdev);
 948                 dev_dbg(&netdev->dev,
 949                         "%s busy - invoking iovm flow control\n",
 950                         __func__);
 951                 devdata->flow_control_upper_hits++;
 952         }
 953         spin_unlock_irqrestore(&devdata->priv_lock, flags);
 954
 955         /* skb will be freed when we get back NET_XMIT_DONE */
 956         return NETDEV_TX_OK;
 957 }
 958
 959 /**
 960  *      visornic_get_stats - returns net_stats of the visornic device
 961  *      @netdev: netdevice
 962  *
 963  *      Returns the net_device_stats for the device
 964  */
 965 static struct net_device_stats *
 966 visornic_get_stats(struct net_device *netdev)
 967 {
 968         struct visornic_devdata *devdata = netdev_priv(netdev);
 969
 970         return &devdata->net_stats;
 971 }
 972
 973 /**
 974  *      visornic_change_mtu - changes mtu of device.
 975  *      @netdev: netdevice
 976  *      @new_mtu: value of new mtu
 977  *
 978  *      MTU cannot be changed by system, must be changed via
 979  *      CONTROLVM message. All vnics and pnics in a switch have
 980  *      to have the same MTU for everything to work.
 981  *      Currently not supported.
 982  *      Returns EINVAL
 983  */
 984 static int
 985 visornic_change_mtu(struct net_device *netdev, int new_mtu)
 986 {
 987         return -EINVAL;
 988 }
 989
 990 /**
 991  *      visornic_set_multi - changes mtu of device.
 992  *      @netdev: netdevice
 993  *
 994  *      Only flag we support currently is IFF_PROMISC
 995  *      Returns void
 996  */
 997 static void
 998 visornic_set_multi(struct net_device *netdev)
 999 {
1000         struct uiscmdrsp *cmdrsp;
1001         struct visornic_devdata *devdata = netdev_priv(netdev);
1002
1003         if (devdata->old_flags == netdev->flags)
1004                 return;
1005
1006         if ((netdev->flags & IFF_PROMISC) ==
1007             (devdata->old_flags & IFF_PROMISC))
1008                 goto out_save_flags;
1009
1010         cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1011         if (!cmdrsp)
1012                 return;
1013         cmdrsp->cmdtype = CMD_NET_TYPE;
1014         cmdrsp->net.type = NET_RCV_PROMISC;
1015         cmdrsp->net.enbdis.context = netdev;
1016         cmdrsp->net.enbdis.enable =
1017                 netdev->flags & IFF_PROMISC;
1018         visorchannel_signalinsert(devdata->dev->visorchannel,
1019                                   IOCHAN_TO_IOPART,
1020                                   cmdrsp);
1021         kfree(cmdrsp);
1022
1023 out_save_flags:
1024         devdata->old_flags = netdev->flags;
1025 }
1026
1027 /**
1028  *      visornic_xmit_timeout - request to timeout the xmit
1029  *      @netdev
1030  *
1031  *      Queue the work and return. Make sure we have not already
1032  *      been informed the IO Partition is gone, if it is gone
1033  *      we will already timeout the xmits.
1034  */
1035 static void
1036 visornic_xmit_timeout(struct net_device *netdev)
1037 {
1038         struct visornic_devdata *devdata = netdev_priv(netdev);
1039         unsigned long flags;
1040
1041         spin_lock_irqsave(&devdata->priv_lock, flags);
1042         if (devdata->going_away) {
1043                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1044                 dev_dbg(&devdata->dev->device,
1045                         "%s aborting because device removal pending\n",
1046                         __func__);
1047                 return;
1048         }
1049
1050         /* Ensure that a ServerDown message hasn't been received */
1051         if (!devdata->enabled ||
1052             (devdata->server_down && !devdata->server_change_state)) {
1053                 dev_dbg(&netdev->dev, "%s no processing\n",
1054                         __func__);
1055                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1056                 return;
1057         }
1058         schedule_work(&devdata->timeout_reset);
1059         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1060 }
1061
1062 /**
1063  *      repost_return   - repost rcv bufs that have come back
1064  *      @cmdrsp: io channel command struct to post
1065  *      @devdata: visornic devdata for the device
1066  *      @skb: skb
1067  *      @netdev: netdevice
1068  *
1069  *      Repost rcv buffers that have been returned to us when
1070  *      we are finished with them.
1071  *      Returns 0 for success, -1 for error.
1072  */
1073 static inline int
1074 repost_return(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1075               struct sk_buff *skb, struct net_device *netdev)
1076 {
1077         struct net_pkt_rcv copy;
1078         int i = 0, cc, numreposted;
1079         int found_skb = 0;
1080         int status = 0;
1081
1082         copy = cmdrsp->net.rcv;
1083         switch (copy.numrcvbufs) {
1084         case 0:
1085                 devdata->n_rcv0++;
1086                 break;
1087         case 1:
1088                 devdata->n_rcv1++;
1089                 break;
1090         case 2:
1091                 devdata->n_rcv2++;
1092                 break;
1093         default:
1094                 devdata->n_rcvx++;
1095                 break;
1096         }
1097         for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1098                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1099                         if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1100                                 continue;
1101
1102                         if ((skb) && devdata->rcvbuf[i] == skb) {
1103                                 devdata->found_repost_rcvbuf_cnt++;
1104                                 found_skb = 1;
1105                                 devdata->repost_found_skb_cnt++;
1106                         }
1107                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1108                         if (!devdata->rcvbuf[i]) {
1109                                 devdata->num_rcv_bufs_could_not_alloc++;
1110                                 devdata->alloc_failed_in_repost_rtn_cnt++;
1111                                 status = -ENOMEM;
1112                                 break;
1113                         }
1114                         post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1115                         numreposted++;
1116                         break;
1117                 }
1118         }
1119         if (numreposted != copy.numrcvbufs) {
1120                 devdata->n_repost_deficit++;
1121                 status = -EINVAL;
1122         }
1123         if (skb) {
1124                 if (found_skb) {
1125                         kfree_skb(skb);
1126                 } else {
1127                         status = -EINVAL;
1128                         devdata->bad_rcv_buf++;
1129                 }
1130         }
1131         return status;
1132 }
1133
1134 /**
1135  *      visornic_rx - Handle receive packets coming back from IO Part
1136  *      @cmdrsp: Receive packet returned from IO Part
1137  *
1138  *      Got a receive packet back from the IO Part, handle it and send
1139  *      it up the stack.
1140  *      Returns 1 iff an skb was receieved, otherwise 0
1141  */
1142 static int
1143 visornic_rx(struct uiscmdrsp *cmdrsp)
1144 {
1145         struct visornic_devdata *devdata;
1146         struct sk_buff *skb, *prev, *curr;
1147         struct net_device *netdev;
1148         int cc, currsize, off;
1149         struct ethhdr *eth;
1150         unsigned long flags;
1151
1152         /* post new rcv buf to the other end using the cmdrsp we have at hand
1153          * post it without holding lock - but we'll use the signal lock to
1154          * synchronize the queue insert the cmdrsp that contains the net.rcv
1155          * is the one we are using to repost, so copy the info we need from it.
1156          */
1157         skb = cmdrsp->net.buf;
1158         netdev = skb->dev;
1159
1160         devdata = netdev_priv(netdev);
1161
1162         spin_lock_irqsave(&devdata->priv_lock, flags);
1163         atomic_dec(&devdata->num_rcvbuf_in_iovm);
1164
1165         /* set length to how much was ACTUALLY received -
1166          * NOTE: rcv_done_len includes actual length of data rcvd
1167          * including ethhdr
1168          */
1169         skb->len = cmdrsp->net.rcv.rcv_done_len;
1170
1171         /* update rcv stats - call it with priv_lock held */
1172         devdata->net_stats.rx_packets++;
1173         devdata->net_stats.rx_bytes += skb->len;
1174
1175         /* test enabled while holding lock */
1176         if (!(devdata->enabled && devdata->enab_dis_acked)) {
1177                 /* don't process it unless we're in enable mode and until
1178                  * we've gotten an ACK saying the other end got our RCV enable
1179                  */
1180                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1181                 repost_return(cmdrsp, devdata, skb, netdev);
1182                 return 0;
1183         }
1184
1185         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1186
1187         /* when skb was allocated, skb->dev, skb->data, skb->len and
1188          * skb->data_len were setup. AND, data has already put into the
1189          * skb (both first frag and in frags pages)
1190          * NOTE: firstfragslen is the amount of data in skb->data and that
1191          * which is not in nr_frags or frag_list. This is now simply
1192          * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1193          * firstfrag & set data_len to show rest see if we have to chain
1194          * frag_list.
1195          */
1196         if (skb->len > RCVPOST_BUF_SIZE) {      /* do PRECAUTIONARY check */
1197                 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1198                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1199                                 dev_err(&devdata->netdev->dev,
1200                                         "repost_return failed");
1201                         return 0;
1202                 }
1203                 /* length rcvd is greater than firstfrag in this skb rcv buf  */
1204                 skb->tail += RCVPOST_BUF_SIZE;  /* amount in skb->data */
1205                 skb->data_len = skb->len - RCVPOST_BUF_SIZE;    /* amount that
1206                                                                  *  will be in
1207                                                                  * frag_list
1208                                                                  */
1209         } else {
1210                 /* data fits in this skb - no chaining - do
1211                  * PRECAUTIONARY check
1212                  */
1213                 if (cmdrsp->net.rcv.numrcvbufs != 1) {  /* should be 1 */
1214                         if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1215                                 dev_err(&devdata->netdev->dev,
1216                                         "repost_return failed");
1217                         return 0;
1218                 }
1219                 skb->tail += skb->len;
1220                 skb->data_len = 0;      /* nothing rcvd in frag_list */
1221         }
1222         off = skb_tail_pointer(skb) - skb->data;
1223
1224         /* amount we bumped tail by in the head skb
1225          * it is used to calculate the size of each chained skb below
1226          * it is also used to index into bufline to continue the copy
1227          * (for chansocktwopc)
1228          * if necessary chain the rcv skbs together.
1229          * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1230          * chain the rest to that one.
1231          * - do PRECAUTIONARY check
1232          */
1233         if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1234                 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1235                         dev_err(&devdata->netdev->dev, "repost_return failed");
1236                 return 0;
1237         }
1238
1239         if (cmdrsp->net.rcv.numrcvbufs > 1) {
1240                 /* chain the various rcv buffers into the skb's frag_list. */
1241                 /* Note: off was initialized above  */
1242                 for (cc = 1, prev = NULL;
1243                      cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1244                         curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1245                         curr->next = NULL;
1246                         if (!prev)      /* start of list- set head */
1247                                 skb_shinfo(skb)->frag_list = curr;
1248                         else
1249                                 prev->next = curr;
1250                         prev = curr;
1251
1252                         /* should we set skb->len and skb->data_len for each
1253                          * buffer being chained??? can't hurt!
1254                          */
1255                         currsize = min(skb->len - off,
1256                                        (unsigned int)RCVPOST_BUF_SIZE);
1257                         curr->len = currsize;
1258                         curr->tail += currsize;
1259                         curr->data_len = 0;
1260                         off += currsize;
1261                 }
1262                 /* assert skb->len == off */
1263                 if (skb->len != off) {
1264                         netdev_err(devdata->netdev,
1265                                    "something wrong; skb->len:%d != off:%d\n",
1266                                    skb->len, off);
1267                 }
1268         }
1269
1270         /* set up packet's protocl type using ethernet header - this
1271          * sets up skb->pkt_type & it also PULLS out the eth header
1272          */
1273         skb->protocol = eth_type_trans(skb, netdev);
1274
1275         eth = eth_hdr(skb);
1276
1277         skb->csum = 0;
1278         skb->ip_summed = CHECKSUM_NONE;
1279
1280         do {
1281                 if (netdev->flags & IFF_PROMISC)
1282                         break;  /* accept all packets */
1283                 if (skb->pkt_type == PACKET_BROADCAST) {
1284                         if (netdev->flags & IFF_BROADCAST)
1285                                 break;  /* accept all broadcast packets */
1286                 } else if (skb->pkt_type == PACKET_MULTICAST) {
1287                         if ((netdev->flags & IFF_MULTICAST) &&
1288                             (netdev_mc_count(netdev))) {
1289                                 struct netdev_hw_addr *ha;
1290                                 int found_mc = 0;
1291
1292                                 /* only accept multicast packets that we can
1293                                  * find in our multicast address list
1294                                  */
1295                                 netdev_for_each_mc_addr(ha, netdev) {
1296                                         if (ether_addr_equal(eth->h_dest,
1297                                                              ha->addr)) {
1298                                                 found_mc = 1;
1299                                                 break;
1300                                         }
1301                                 }
1302                                 /* accept pkt, dest matches a multicast addr */
1303                                 if (found_mc)
1304                                         break;
1305                         }
1306                 /* accept packet, h_dest must match vnic  mac address */
1307                 } else if (skb->pkt_type == PACKET_HOST) {
1308                         break;
1309                 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1310                         /* something is not right */
1311                         dev_err(&devdata->netdev->dev,
1312                                 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1313                                 netdev->name, eth->h_dest, netdev->dev_addr);
1314                 }
1315                 /* drop packet - don't forward it up to OS */
1316                 devdata->n_rcv_packets_not_accepted++;
1317                 repost_return(cmdrsp, devdata, skb, netdev);
1318                 return 0;
1319         } while (0);
1320
1321         netif_receive_skb(skb);
1322         /* netif_rx returns various values, but "in practice most drivers
1323          * ignore the return value
1324          */
1325
1326         skb = NULL;
1327         /*
1328          * whether the packet got dropped or handled, the skb is freed by
1329          * kernel code, so we shouldn't free it. but we should repost a
1330          * new rcv buffer.
1331          */
1332         repost_return(cmdrsp, devdata, skb, netdev);
1333         return 1;
1334 }
1335
1336 /**
1337  *      devdata_initialize      - Initialize devdata structure
1338  *      @devdata: visornic_devdata structure to initialize
1339  *      #dev: visorbus_deviced it belongs to
1340  *
1341  *      Setup initial values for the visornic based on channel and default
1342  *      values.
1343  *      Returns a pointer to the devdata structure
1344  */
1345 static struct visornic_devdata *
1346 devdata_initialize(struct visornic_devdata *devdata, struct visor_device *dev)
1347 {
1348         devdata->dev = dev;
1349         devdata->incarnation_id = get_jiffies_64();
1350         return devdata;
1351 }
1352
1353 /**
1354  *      devdata_release - Frees up references in devdata
1355  *      @devdata: struct to clean up
1356  *
1357  *      Frees up references in devdata.
1358  *      Returns void
1359  */
1360 static void devdata_release(struct visornic_devdata *devdata)
1361 {
1362         kfree(devdata->rcvbuf);
1363         kfree(devdata->cmdrsp_rcv);
1364         kfree(devdata->xmit_cmdrsp);
1365 }
1366
1367 static const struct net_device_ops visornic_dev_ops = {
1368         .ndo_open = visornic_open,
1369         .ndo_stop = visornic_close,
1370         .ndo_start_xmit = visornic_xmit,
1371         .ndo_get_stats = visornic_get_stats,
1372         .ndo_change_mtu = visornic_change_mtu,
1373         .ndo_tx_timeout = visornic_xmit_timeout,
1374         .ndo_set_rx_mode = visornic_set_multi,
1375 };
1376
1377 /* DebugFS code */
1378 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1379                                  size_t len, loff_t *offset)
1380 {
1381         ssize_t bytes_read = 0;
1382         int str_pos = 0;
1383         struct visornic_devdata *devdata;
1384         struct net_device *dev;
1385         char *vbuf;
1386
1387         if (len > MAX_BUF)
1388                 len = MAX_BUF;
1389         vbuf = kzalloc(len, GFP_KERNEL);
1390         if (!vbuf)
1391                 return -ENOMEM;
1392
1393         /* for each vnic channel dump out channel specific data */
1394         rcu_read_lock();
1395         for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1396                 /* Only consider netdevs that are visornic, and are open */
1397                 if ((dev->netdev_ops != &visornic_dev_ops) ||
1398                     (!netif_queue_stopped(dev)))
1399                         continue;
1400
1401                 devdata = netdev_priv(dev);
1402                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1403                                      "netdev = %s (0x%p), MAC Addr %pM\n",
1404                                      dev->name,
1405                                      dev,
1406                                      dev->dev_addr);
1407                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1408                                      "VisorNic Dev Info = 0x%p\n", devdata);
1409                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1410                                      " num_rcv_bufs = %d\n",
1411                                      devdata->num_rcv_bufs);
1412                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1413                                      " max_oustanding_next_xmits = %lu\n",
1414                                     devdata->max_outstanding_net_xmits);
1415                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1416                                      " upper_threshold_net_xmits = %lu\n",
1417                                      devdata->upper_threshold_net_xmits);
1418                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1419                                      " lower_threshold_net_xmits = %lu\n",
1420                                      devdata->lower_threshold_net_xmits);
1421                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1422                                      " queuefullmsg_logged = %d\n",
1423                                      devdata->queuefullmsg_logged);
1424                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1425                                      " chstat.got_rcv = %lu\n",
1426                                      devdata->chstat.got_rcv);
1427                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1428                                      " chstat.got_enbdisack = %lu\n",
1429                                      devdata->chstat.got_enbdisack);
1430                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1431                                      " chstat.got_xmit_done = %lu\n",
1432                                      devdata->chstat.got_xmit_done);
1433                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1434                                      " chstat.xmit_fail = %lu\n",
1435                                      devdata->chstat.xmit_fail);
1436                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1437                                      " chstat.sent_enbdis = %lu\n",
1438                                      devdata->chstat.sent_enbdis);
1439                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1440                                      " chstat.sent_promisc = %lu\n",
1441                                      devdata->chstat.sent_promisc);
1442                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1443                                      " chstat.sent_post = %lu\n",
1444                                      devdata->chstat.sent_post);
1445                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1446                                      " chstat.sent_post_failed = %lu\n",
1447                                      devdata->chstat.sent_post_failed);
1448                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1449                                      " chstat.sent_xmit = %lu\n",
1450                                      devdata->chstat.sent_xmit);
1451                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1452                                      " chstat.reject_count = %lu\n",
1453                                      devdata->chstat.reject_count);
1454                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1455                                      " chstat.extra_rcvbufs_sent = %lu\n",
1456                                      devdata->chstat.extra_rcvbufs_sent);
1457                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1458                                      " n_rcv0 = %lu\n", devdata->n_rcv0);
1459                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1460                                      " n_rcv1 = %lu\n", devdata->n_rcv1);
1461                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1462                                      " n_rcv2 = %lu\n", devdata->n_rcv2);
1463                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1464                                      " n_rcvx = %lu\n", devdata->n_rcvx);
1465                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1466                                      " num_rcvbuf_in_iovm = %d\n",
1467                                      atomic_read(&devdata->num_rcvbuf_in_iovm));
1468                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1469                                      " alloc_failed_in_if_needed_cnt = %lu\n",
1470                                      devdata->alloc_failed_in_if_needed_cnt);
1471                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1472                                      " alloc_failed_in_repost_rtn_cnt = %lu\n",
1473                                      devdata->alloc_failed_in_repost_rtn_cnt);
1474                 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1475                  *                   " inner_loop_limit_reached_cnt = %lu\n",
1476                  *                   devdata->inner_loop_limit_reached_cnt);
1477                  */
1478                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1479                                      " found_repost_rcvbuf_cnt = %lu\n",
1480                                      devdata->found_repost_rcvbuf_cnt);
1481                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1482                                      " repost_found_skb_cnt = %lu\n",
1483                                      devdata->repost_found_skb_cnt);
1484                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1485                                      " n_repost_deficit = %lu\n",
1486                                      devdata->n_repost_deficit);
1487                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1488                                      " bad_rcv_buf = %lu\n",
1489                                      devdata->bad_rcv_buf);
1490                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1491                                      " n_rcv_packets_not_accepted = %lu\n",
1492                                      devdata->n_rcv_packets_not_accepted);
1493                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1494                                      " interrupts_rcvd = %llu\n",
1495                                      devdata->interrupts_rcvd);
1496                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1497                                      " interrupts_notme = %llu\n",
1498                                      devdata->interrupts_notme);
1499                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1500                                      " interrupts_disabled = %llu\n",
1501                                      devdata->interrupts_disabled);
1502                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1503                                      " busy_cnt = %llu\n",
1504                                      devdata->busy_cnt);
1505                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1506                                      " flow_control_upper_hits = %llu\n",
1507                                      devdata->flow_control_upper_hits);
1508                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1509                                      " flow_control_lower_hits = %llu\n",
1510                                      devdata->flow_control_lower_hits);
1511                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1512                                      " netif_queue = %s\n",
1513                                      netif_queue_stopped(devdata->netdev) ?
1514                                      "stopped" : "running");
1515                 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1516                                      " xmits_outstanding = %lu\n",
1517                                      devdata_xmits_outstanding(devdata));
1518         }
1519         rcu_read_unlock();
1520         bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1521         kfree(vbuf);
1522         return bytes_read;
1523 }
1524
1525 /**
1526  *      send_rcv_posts_if_needed
1527  *      @devdata: visornic device
1528  *
1529  *      Send receive buffers to the IO Partition.
1530  *      Returns void
1531  */
1532 static void
1533 send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1534 {
1535         int i;
1536         struct net_device *netdev;
1537         struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1538         int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1539
1540         /* don't do this until vnic is marked ready */
1541         if (!(devdata->enabled && devdata->enab_dis_acked))
1542                 return;
1543
1544         netdev = devdata->netdev;
1545         rcv_bufs_allocated = 0;
1546         /* this code is trying to prevent getting stuck here forever,
1547          * but still retry it if you cant allocate them all this time.
1548          */
1549         cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1550         while (cur_num_rcv_bufs_to_alloc > 0) {
1551                 cur_num_rcv_bufs_to_alloc--;
1552                 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1553                         if (devdata->rcvbuf[i])
1554                                 continue;
1555                         devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1556                         if (!devdata->rcvbuf[i]) {
1557                                 devdata->alloc_failed_in_if_needed_cnt++;
1558                                 break;
1559                         }
1560                         rcv_bufs_allocated++;
1561                         post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1562                         devdata->chstat.extra_rcvbufs_sent++;
1563                 }
1564         }
1565         devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1566 }
1567
1568 /**
1569  *      drain_resp_queue  - drains and ignores all messages from the resp queue
1570  *      @cmdrsp: io channel command response message
1571  *      @devdata: visornic device to drain
1572  */
1573 static void
1574 drain_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata)
1575 {
1576         while (visorchannel_signalremove(devdata->dev->visorchannel,
1577                                          IOCHAN_FROM_IOPART,
1578                                          cmdrsp))
1579                 ;
1580 }
1581
1582 /**
1583  *      service_resp_queue      - drains the response queue
1584  *      @cmdrsp: io channel command response message
1585  *      @devdata: visornic device to drain
1586  *
1587  *      Drain the respones queue of any responses from the IO partition.
1588  *      Process the responses as we get them.
1589  *      Returns when response queue is empty or when the threadd stops.
1590  */
1591 static void
1592 service_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1593                    int *rx_work_done, int budget)
1594 {
1595         unsigned long flags;
1596         struct net_device *netdev;
1597
1598         while (*rx_work_done < budget) {
1599         /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1600          * moment
1601          */
1602                 if (!visorchannel_signalremove(devdata->dev->visorchannel,
1603                                                IOCHAN_FROM_IOPART,
1604                                                cmdrsp))
1605                         break; /* queue empty */
1606
1607                 switch (cmdrsp->net.type) {
1608                 case NET_RCV:
1609                         devdata->chstat.got_rcv++;
1610                         /* process incoming packet */
1611                         *rx_work_done += visornic_rx(cmdrsp);
1612                         break;
1613                 case NET_XMIT_DONE:
1614                         spin_lock_irqsave(&devdata->priv_lock, flags);
1615                         devdata->chstat.got_xmit_done++;
1616                         if (cmdrsp->net.xmtdone.xmt_done_result)
1617                                 devdata->chstat.xmit_fail++;
1618                         /* only call queue wake if we stopped it */
1619                         netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1620                         /* ASSERT netdev == vnicinfo->netdev; */
1621                         if ((netdev == devdata->netdev) &&
1622                             netif_queue_stopped(netdev)) {
1623                                 /* check if we have crossed the lower watermark
1624                                  * for netif_wake_queue()
1625                                  */
1626                                 if (vnic_hit_low_watermark
1627                                     (devdata,
1628                                      devdata->lower_threshold_net_xmits)) {
1629                                         /* enough NET_XMITs completed
1630                                          * so can restart netif queue
1631                                          */
1632                                         netif_wake_queue(netdev);
1633                                         devdata->flow_control_lower_hits++;
1634                                 }
1635                         }
1636                         skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1637                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1638                         kfree_skb(cmdrsp->net.buf);
1639                         break;
1640                 case NET_RCV_ENBDIS_ACK:
1641                         devdata->chstat.got_enbdisack++;
1642                         netdev = (struct net_device *)
1643                         cmdrsp->net.enbdis.context;
1644                         spin_lock_irqsave(&devdata->priv_lock, flags);
1645                         devdata->enab_dis_acked = 1;
1646                         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1647
1648                         if (devdata->server_down &&
1649                             devdata->server_change_state) {
1650                                 /* Inform Linux that the link is up */
1651                                 devdata->server_down = false;
1652                                 devdata->server_change_state = false;
1653                                 netif_wake_queue(netdev);
1654                                 netif_carrier_on(netdev);
1655                         }
1656                         break;
1657                 case NET_CONNECT_STATUS:
1658                         netdev = devdata->netdev;
1659                         if (cmdrsp->net.enbdis.enable == 1) {
1660                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1661                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1662                                 spin_unlock_irqrestore(&devdata->priv_lock,
1663                                                        flags);
1664                                 netif_wake_queue(netdev);
1665                                 netif_carrier_on(netdev);
1666                         } else {
1667                                 netif_stop_queue(netdev);
1668                                 netif_carrier_off(netdev);
1669                                 spin_lock_irqsave(&devdata->priv_lock, flags);
1670                                 devdata->enabled = cmdrsp->net.enbdis.enable;
1671                                 spin_unlock_irqrestore(&devdata->priv_lock,
1672                                                        flags);
1673                         }
1674                         break;
1675                 default:
1676                         break;
1677                 }
1678                 /* cmdrsp is now available for reuse  */
1679         }
1680 }
1681
1682 static int visornic_poll(struct napi_struct *napi, int budget)
1683 {
1684         struct visornic_devdata *devdata = container_of(napi,
1685                                                         struct visornic_devdata,
1686                                                         napi);
1687         int rx_count = 0;
1688
1689         send_rcv_posts_if_needed(devdata);
1690         service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1691
1692         /* If there aren't any more packets to receive stop the poll */
1693         if (rx_count < budget)
1694                 napi_complete(napi);
1695
1696         return rx_count;
1697 }
1698
1699 /**
1700  *      poll_for_irq    - Checks the status of the response queue.
1701  *      @v: void pointer to the visronic devdata
1702  *
1703  *      Main function of the vnic_incoming thread. Peridocially check the
1704  *      response queue and drain it if needed.
1705  *      Returns when thread has stopped.
1706  */
1707 static void
1708 poll_for_irq(unsigned long v)
1709 {
1710         struct visornic_devdata *devdata = (struct visornic_devdata *)v;
1711
1712         if (!visorchannel_signalempty(
1713                                    devdata->dev->visorchannel,
1714                                    IOCHAN_FROM_IOPART))
1715                 napi_schedule(&devdata->napi);
1716
1717         atomic_set(&devdata->interrupt_rcvd, 0);
1718
1719         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1720 }
1721
1722 /**
1723  *      visornic_probe  - probe function for visornic devices
1724  *      @dev: The visor device discovered
1725  *
1726  *      Called when visorbus discovers a visornic device on its
1727  *      bus. It creates a new visornic ethernet adapter.
1728  *      Returns 0 or negative for error.
1729  */
1730 static int visornic_probe(struct visor_device *dev)
1731 {
1732         struct visornic_devdata *devdata = NULL;
1733         struct net_device *netdev = NULL;
1734         int err;
1735         int channel_offset = 0;
1736         u64 features;
1737
1738         netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1739         if (!netdev) {
1740                 dev_err(&dev->device,
1741                         "%s alloc_etherdev failed\n", __func__);
1742                 return -ENOMEM;
1743         }
1744
1745         netdev->netdev_ops = &visornic_dev_ops;
1746         netdev->watchdog_timeo = 5 * HZ;
1747         SET_NETDEV_DEV(netdev, &dev->device);
1748
1749         /* Get MAC adddress from channel and read it into the device. */
1750         netdev->addr_len = ETH_ALEN;
1751         channel_offset = offsetof(struct spar_io_channel_protocol,
1752                                   vnic.macaddr);
1753         err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1754                                     ETH_ALEN);
1755         if (err < 0) {
1756                 dev_err(&dev->device,
1757                         "%s failed to get mac addr from chan (%d)\n",
1758                         __func__, err);
1759                 goto cleanup_netdev;
1760         }
1761
1762         devdata = devdata_initialize(netdev_priv(netdev), dev);
1763         if (!devdata) {
1764                 dev_err(&dev->device,
1765                         "%s devdata_initialize failed\n", __func__);
1766                 err = -ENOMEM;
1767                 goto cleanup_netdev;
1768         }
1769         /* don't trust messages laying around in the channel */
1770         drain_resp_queue(devdata->cmdrsp, devdata);
1771
1772         devdata->netdev = netdev;
1773         dev_set_drvdata(&dev->device, devdata);
1774         init_waitqueue_head(&devdata->rsp_queue);
1775         spin_lock_init(&devdata->priv_lock);
1776         devdata->enabled = 0; /* not yet */
1777         atomic_set(&devdata->usage, 1);
1778
1779         /* Setup rcv bufs */
1780         channel_offset = offsetof(struct spar_io_channel_protocol,
1781                                   vnic.num_rcv_bufs);
1782         err = visorbus_read_channel(dev, channel_offset,
1783                                     &devdata->num_rcv_bufs, 4);
1784         if (err) {
1785                 dev_err(&dev->device,
1786                         "%s failed to get #rcv bufs from chan (%d)\n",
1787                         __func__, err);
1788                 goto cleanup_netdev;
1789         }
1790
1791         devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1792                                   sizeof(struct sk_buff *), GFP_KERNEL);
1793         if (!devdata->rcvbuf) {
1794                 err = -ENOMEM;
1795                 goto cleanup_netdev;
1796         }
1797
1798         /* set the net_xmit outstanding threshold */
1799         /* always leave two slots open but you should have 3 at a minimum */
1800         /* note that max_outstanding_net_xmits must be > 0 */
1801         devdata->max_outstanding_net_xmits =
1802                 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1803         devdata->upper_threshold_net_xmits =
1804                 max_t(unsigned long,
1805                       2, (devdata->max_outstanding_net_xmits - 1));
1806         devdata->lower_threshold_net_xmits =
1807                 max_t(unsigned long,
1808                       1, (devdata->max_outstanding_net_xmits / 2));
1809
1810         skb_queue_head_init(&devdata->xmitbufhead);
1811
1812         /* create a cmdrsp we can use to post and unpost rcv buffers */
1813         devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1814         if (!devdata->cmdrsp_rcv) {
1815                 err = -ENOMEM;
1816                 goto cleanup_rcvbuf;
1817         }
1818         devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1819         if (!devdata->xmit_cmdrsp) {
1820                 err = -ENOMEM;
1821                 goto cleanup_cmdrsp_rcv;
1822         }
1823         INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1824         devdata->server_down = false;
1825         devdata->server_change_state = false;
1826
1827         /*set the default mtu */
1828         channel_offset = offsetof(struct spar_io_channel_protocol,
1829                                   vnic.mtu);
1830         err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1831         if (err) {
1832                 dev_err(&dev->device,
1833                         "%s failed to get mtu from chan (%d)\n",
1834                         __func__, err);
1835                 goto cleanup_xmit_cmdrsp;
1836         }
1837
1838         /* TODO: Setup Interrupt information */
1839         /* Let's start our threads to get responses */
1840         netif_napi_add(netdev, &devdata->napi, visornic_poll, 64);
1841
1842         setup_timer(&devdata->irq_poll_timer, poll_for_irq,
1843                     (unsigned long)devdata);
1844         /* Note: This time has to start running before the while
1845          * loop below because the napi routine is responsible for
1846          * setting enab_dis_acked
1847          */
1848         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1849
1850         channel_offset = offsetof(struct spar_io_channel_protocol,
1851                                   channel_header.features);
1852         err = visorbus_read_channel(dev, channel_offset, &features, 8);
1853         if (err) {
1854                 dev_err(&dev->device,
1855                         "%s failed to get features from chan (%d)\n",
1856                         __func__, err);
1857                 goto cleanup_napi_add;
1858         }
1859
1860         features |= ULTRA_IO_CHANNEL_IS_POLLING;
1861         features |= ULTRA_IO_DRIVER_SUPPORTS_ENHANCED_RCVBUF_CHECKING;
1862         err = visorbus_write_channel(dev, channel_offset, &features, 8);
1863         if (err) {
1864                 dev_err(&dev->device,
1865                         "%s failed to set features in chan (%d)\n",
1866                         __func__, err);
1867                 goto cleanup_napi_add;
1868         }
1869
1870         /* Let's start our threads to get responses */
1871         netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1872
1873         /* Note: Interupts have to be enable before the while
1874          * loop below because the napi routine is responsible for
1875          * setting enab_dis_acked
1876          */
1877         visorbus_enable_channel_interrupts(dev);
1878
1879         err = register_netdev(netdev);
1880         if (err) {
1881                 dev_err(&dev->device,
1882                         "%s register_netdev failed (%d)\n", __func__, err);
1883                 goto cleanup_napi_add;
1884         }
1885
1886         /* create debgug/sysfs directories */
1887         devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1888                                                       visornic_debugfs_dir);
1889         if (!devdata->eth_debugfs_dir) {
1890                 dev_err(&dev->device,
1891                         "%s debugfs_create_dir %s failed\n",
1892                         __func__, netdev->name);
1893                 err = -ENOMEM;
1894                 goto cleanup_register_netdev;
1895         }
1896
1897         dev_info(&dev->device, "%s success netdev=%s\n",
1898                  __func__, netdev->name);
1899         return 0;
1900
1901 cleanup_register_netdev:
1902         unregister_netdev(netdev);
1903
1904 cleanup_napi_add:
1905         del_timer_sync(&devdata->irq_poll_timer);
1906         netif_napi_del(&devdata->napi);
1907
1908 cleanup_xmit_cmdrsp:
1909         kfree(devdata->xmit_cmdrsp);
1910
1911 cleanup_cmdrsp_rcv:
1912         kfree(devdata->cmdrsp_rcv);
1913
1914 cleanup_rcvbuf:
1915         kfree(devdata->rcvbuf);
1916
1917 cleanup_netdev:
1918         free_netdev(netdev);
1919         return err;
1920 }
1921
1922 /**
1923  *      host_side_disappeared   - IO part is gone.
1924  *      @devdata: device object
1925  *
1926  *      IO partition servicing this device is gone, do cleanup
1927  *      Returns void.
1928  */
1929 static void host_side_disappeared(struct visornic_devdata *devdata)
1930 {
1931         unsigned long flags;
1932
1933         spin_lock_irqsave(&devdata->priv_lock, flags);
1934         devdata->dev = NULL;   /* indicate device destroyed */
1935         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1936 }
1937
1938 /**
1939  *      visornic_remove         - Called when visornic dev goes away
1940  *      @dev: visornic device that is being removed
1941  *
1942  *      Called when DEVICE_DESTROY gets called to remove device.
1943  *      Returns void
1944  */
1945 static void visornic_remove(struct visor_device *dev)
1946 {
1947         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1948         struct net_device *netdev;
1949         unsigned long flags;
1950
1951         if (!devdata) {
1952                 dev_err(&dev->device, "%s no devdata\n", __func__);
1953                 return;
1954         }
1955         spin_lock_irqsave(&devdata->priv_lock, flags);
1956         if (devdata->going_away) {
1957                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1958                 dev_err(&dev->device, "%s already being removed\n", __func__);
1959                 return;
1960         }
1961         devdata->going_away = true;
1962         spin_unlock_irqrestore(&devdata->priv_lock, flags);
1963         netdev = devdata->netdev;
1964         if (!netdev) {
1965                 dev_err(&dev->device, "%s not net device\n", __func__);
1966                 return;
1967         }
1968
1969         /* going_away prevents new items being added to the workqueues */
1970         cancel_work_sync(&devdata->timeout_reset);
1971
1972         debugfs_remove_recursive(devdata->eth_debugfs_dir);
1973
1974         unregister_netdev(netdev);  /* this will call visornic_close() */
1975
1976         del_timer_sync(&devdata->irq_poll_timer);
1977         netif_napi_del(&devdata->napi);
1978
1979         dev_set_drvdata(&dev->device, NULL);
1980         host_side_disappeared(devdata);
1981         devdata_release(devdata);
1982         free_netdev(netdev);
1983 }
1984
1985 /**
1986  *      visornic_pause          - Called when IO Part disappears
1987  *      @dev: visornic device that is being serviced
1988  *      @complete_func: call when finished.
1989  *
1990  *      Called when the IO Partition has gone down. Need to free
1991  *      up resources and wait for IO partition to come back. Mark
1992  *      link as down and don't attempt any DMA. When we have freed
1993  *      memory call the complete_func so that Command knows we are
1994  *      done. If we don't call complete_func, IO part will never
1995  *      come back.
1996  *      Returns 0 for success.
1997  */
1998 static int visornic_pause(struct visor_device *dev,
1999                           visorbus_state_complete_func complete_func)
2000 {
2001         struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2002
2003         visornic_serverdown(devdata, complete_func);
2004         return 0;
2005 }
2006
2007 /**
2008  *      visornic_resume         - Called when IO part has recovered
2009  *      @dev: visornic device that is being serviced
2010  *      @compelte_func: call when finished
2011  *
2012  *      Called when the IO partition has recovered. Reestablish
2013  *      connection to the IO part and set the link up. Okay to do
2014  *      DMA again.
2015  *      Returns 0 for success.
2016  */
2017 static int visornic_resume(struct visor_device *dev,
2018                            visorbus_state_complete_func complete_func)
2019 {
2020         struct visornic_devdata *devdata;
2021         struct net_device *netdev;
2022         unsigned long flags;
2023
2024         devdata = dev_get_drvdata(&dev->device);
2025         if (!devdata) {
2026                 dev_err(&dev->device, "%s no devdata\n", __func__);
2027                 return -EINVAL;
2028         }
2029
2030         netdev = devdata->netdev;
2031
2032         spin_lock_irqsave(&devdata->priv_lock, flags);
2033         if (devdata->server_change_state) {
2034                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2035                 dev_err(&dev->device, "%s server already changing state\n",
2036                         __func__);
2037                 return -EINVAL;
2038         }
2039         if (!devdata->server_down) {
2040                 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2041                 dev_err(&dev->device, "%s server not down\n", __func__);
2042                 complete_func(dev, 0);
2043                 return 0;
2044         }
2045         devdata->server_change_state = true;
2046         spin_unlock_irqrestore(&devdata->priv_lock, flags);
2047
2048         /* Must transition channel to ATTACHED state BEFORE
2049          * we can start using the device again.
2050          * TODO: State transitions
2051          */
2052         mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2053
2054         init_rcv_bufs(netdev, devdata);
2055
2056         rtnl_lock();
2057         dev_open(netdev);
2058         rtnl_unlock();
2059
2060         complete_func(dev, 0);
2061         return 0;
2062 }
2063
2064 /**
2065  *      visornic_init   - Init function
2066  *
2067  *      Init function for the visornic driver. Do initial driver setup
2068  *      and wait for devices.
2069  *      Returns 0 for success, negative for error.
2070  */
2071 static int visornic_init(void)
2072 {
2073         struct dentry *ret;
2074         int err = -ENOMEM;
2075
2076         visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2077         if (!visornic_debugfs_dir)
2078                 return err;
2079
2080         ret = debugfs_create_file("info", S_IRUSR, visornic_debugfs_dir, NULL,
2081                                   &debugfs_info_fops);
2082         if (!ret)
2083                 goto cleanup_debugfs;
2084         ret = debugfs_create_file("enable_ints", S_IWUSR, visornic_debugfs_dir,
2085                                   NULL, &debugfs_enable_ints_fops);
2086         if (!ret)
2087                 goto cleanup_debugfs;
2088
2089         err = visorbus_register_visor_driver(&visornic_driver);
2090         if (err)
2091                 goto cleanup_debugfs;
2092
2093         return 0;
2094
2095 cleanup_debugfs:
2096         debugfs_remove_recursive(visornic_debugfs_dir);
2097
2098         return err;
2099 }
2100
2101 /**
2102  *      visornic_cleanup        - driver exit routine
2103  *
2104  *      Unregister driver from the bus and free up memory.
2105  */
2106 static void visornic_cleanup(void)
2107 {
2108         visorbus_unregister_visor_driver(&visornic_driver);
2109
2110         debugfs_remove_recursive(visornic_debugfs_dir);
2111 }
2112
2113 module_init(visornic_init);
2114 module_exit(visornic_cleanup);
2115
2116 MODULE_AUTHOR("Unisys");
2117 MODULE_LICENSE("GPL");
2118 MODULE_DESCRIPTION("sPAR nic driver for sparlinux: ver 1.0.0.0");
2119 MODULE_VERSION("1.0.0.0");