1 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for more
15 /* This driver lives in a spar partition, and registers to ethernet io
16 * channels from the visorbus driver. It creates netdev devices and
17 * forwards transmit to the IO channel and accepts rcvs from the IO
18 * Partition via the IO channel.
21 #include <linux/debugfs.h>
22 #include <linux/etherdevice.h>
23 #include <linux/netdevice.h>
24 #include <linux/kthread.h>
25 #include <linux/skbuff.h>
26 #include <linux/rtnetlink.h>
29 #include "iochannel.h"
31 #define VISORNIC_INFINITE_RSP_WAIT 0
32 #define VISORNICSOPENMAX 32
33 #define MAXDEVICES 16384
35 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
38 #define MAX_BUF 163840
39 #define NAPI_WEIGHT 64
41 static int visornic_probe(struct visor_device *dev);
42 static void visornic_remove(struct visor_device *dev);
43 static int visornic_pause(struct visor_device *dev,
44 visorbus_state_complete_func complete_func);
45 static int visornic_resume(struct visor_device *dev,
46 visorbus_state_complete_func complete_func);
48 /* DEBUGFS declarations */
49 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
50 size_t len, loff_t *offset);
51 static ssize_t enable_ints_write(struct file *file, const char __user *buf,
52 size_t len, loff_t *ppos);
53 static struct dentry *visornic_debugfs_dir;
54 static const struct file_operations debugfs_info_fops = {
55 .read = info_debugfs_read,
58 static const struct file_operations debugfs_enable_ints_fops = {
59 .write = enable_ints_write,
62 /* GUIDS for director channel type supported by this driver. */
63 static struct visor_channeltype_descriptor visornic_channel_types[] = {
64 /* Note that the only channel type we expect to be reported by the
65 * bus driver is the SPAR_VNIC channel.
67 { SPAR_VNIC_CHANNEL_PROTOCOL_UUID, "ultravnic" },
68 { NULL_UUID_LE, NULL }
70 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
72 * FIXME XXX: This next line of code must be fixed and removed before
73 * acceptance into the 'normal' part of the kernel. It is only here as a place
74 * holder to get module autoloading functionality working for visorbus. Code
75 * must be added to scripts/mode/file2alias.c, etc., to get this working
78 MODULE_ALIAS("visorbus:" SPAR_VNIC_CHANNEL_PROTOCOL_UUID_STR);
80 /* This is used to tell the visor bus driver which types of visor devices
81 * we support, and what functions to call when a visor device that we support
82 * is attached or removed.
84 static struct visor_driver visornic_driver = {
89 .channel_types = visornic_channel_types,
90 .probe = visornic_probe,
91 .remove = visornic_remove,
92 .pause = visornic_pause,
93 .resume = visornic_resume,
94 .channel_interrupt = NULL,
98 unsigned long got_rcv;
99 unsigned long got_enbdisack;
100 unsigned long got_xmit_done;
101 unsigned long xmit_fail;
102 unsigned long sent_enbdis;
103 unsigned long sent_promisc;
104 unsigned long sent_post;
105 unsigned long sent_post_failed;
106 unsigned long sent_xmit;
107 unsigned long reject_count;
108 unsigned long extra_rcvbufs_sent;
111 struct visornic_devdata {
112 /* 0 disabled 1 enabled to receive */
113 unsigned short enabled;
114 /* NET_RCV_ENABLE/DISABLE acked by IOPART */
115 unsigned short enab_dis_acked;
117 struct visor_device *dev;
118 struct net_device *netdev;
119 struct net_device_stats net_stats;
120 atomic_t interrupt_rcvd;
121 wait_queue_head_t rsp_queue;
122 struct sk_buff **rcvbuf;
123 /* incarnation_id lets IOPART know about re-birth */
125 /* flags as they were prior to set_multicast_list */
126 unsigned short old_flags;
127 atomic_t usage; /* count of users */
129 /* number of rcv buffers the vnic will post */
131 int num_rcv_bufs_could_not_alloc;
132 atomic_t num_rcvbuf_in_iovm;
133 unsigned long alloc_failed_in_if_needed_cnt;
134 unsigned long alloc_failed_in_repost_rtn_cnt;
136 /* absolute max number of outstanding xmits - should never hit this */
137 unsigned long max_outstanding_net_xmits;
138 /* high water mark for calling netif_stop_queue() */
139 unsigned long upper_threshold_net_xmits;
140 /* high water mark for calling netif_wake_queue() */
141 unsigned long lower_threshold_net_xmits;
142 /* xmitbufhead - head of the xmit buffer list sent to the IOPART end */
143 struct sk_buff_head xmitbufhead;
145 visorbus_state_complete_func server_down_complete_func;
146 struct work_struct timeout_reset;
147 /* cmdrsp_rcv is used for posting/unposting rcv buffers */
148 struct uiscmdrsp *cmdrsp_rcv;
149 /* xmit_cmdrsp - issues NET_XMIT - only one active xmit at a time */
150 struct uiscmdrsp *xmit_cmdrsp;
152 bool server_down; /* IOPART is down */
153 bool server_change_state; /* Processing SERVER_CHANGESTATE msg */
154 bool going_away; /* device is being torn down */
155 struct dentry *eth_debugfs_dir;
157 u64 interrupts_notme;
158 u64 interrupts_disabled;
160 spinlock_t priv_lock; /* spinlock to access devdata structures */
162 /* flow control counter */
163 u64 flow_control_upper_hits;
164 u64 flow_control_lower_hits;
167 unsigned long n_rcv0; /* # rcvs of 0 buffers */
168 unsigned long n_rcv1; /* # rcvs of 1 buffers */
169 unsigned long n_rcv2; /* # rcvs of 2 buffers */
170 unsigned long n_rcvx; /* # rcvs of >2 buffers */
171 unsigned long found_repost_rcvbuf_cnt; /* # repost_rcvbuf_cnt */
172 unsigned long repost_found_skb_cnt; /* # of found the skb */
173 unsigned long n_repost_deficit; /* # of lost rcv buffers */
174 unsigned long bad_rcv_buf; /* # of unknown rcv skb not freed */
175 unsigned long n_rcv_packets_not_accepted;/* # bogs rcv packets */
177 int queuefullmsg_logged;
178 struct chanstat chstat;
179 struct timer_list irq_poll_timer;
180 struct napi_struct napi;
181 struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
184 static int visornic_poll(struct napi_struct *napi, int budget);
185 static void poll_for_irq(unsigned long v);
188 * visor_copy_fragsinfo_from_skb(
189 * @skb_in: skbuff that we are pulling the frags from
190 * @firstfraglen: length of first fragment in skb
191 * @frags_max: max len of frags array
192 * @frags: frags array filled in on output
194 * Copy the fragment list in the SKB to a phys_info
195 * array that the IOPART understands.
196 * Return value indicates number of entries filled in frags
197 * Negative values indicate an error.
200 visor_copy_fragsinfo_from_skb(struct sk_buff *skb, unsigned int firstfraglen,
201 unsigned int frags_max,
202 struct phys_info frags[])
204 unsigned int count = 0, frag, size, offset = 0, numfrags;
205 unsigned int total_count;
207 numfrags = skb_shinfo(skb)->nr_frags;
209 /* Compute the number of fragments this skb has, and if its more than
210 * frag array can hold, linearize the skb
212 total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
213 if (firstfraglen % PI_PAGE_SIZE)
216 if (total_count > frags_max) {
217 if (skb_linearize(skb))
219 numfrags = skb_shinfo(skb)->nr_frags;
223 while (firstfraglen) {
224 if (count == frags_max)
227 frags[count].pi_pfn =
228 page_to_pfn(virt_to_page(skb->data + offset));
229 frags[count].pi_off =
230 (unsigned long)(skb->data + offset) & PI_PAGE_MASK;
231 size = min_t(unsigned int, firstfraglen,
232 PI_PAGE_SIZE - frags[count].pi_off);
234 /* can take smallest of firstfraglen (what's left) OR
235 * bytes left in the page
237 frags[count].pi_len = size;
238 firstfraglen -= size;
243 if ((count + numfrags) > frags_max)
246 for (frag = 0; frag < numfrags; frag++) {
247 count = add_physinfo_entries(page_to_pfn(
248 skb_frag_page(&skb_shinfo(skb)->frags[frag])),
249 skb_shinfo(skb)->frags[frag].
251 skb_shinfo(skb)->frags[frag].
252 size, count, frags_max, frags);
253 /* add_physinfo_entries only returns
254 * zero if the frags array is out of room
255 * That should never happen because we
256 * fail above, if count+numfrags > frags_max.
262 if (skb_shinfo(skb)->frag_list) {
263 struct sk_buff *skbinlist;
266 for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
267 skbinlist = skbinlist->next) {
268 c = visor_copy_fragsinfo_from_skb(skbinlist,
281 static ssize_t enable_ints_write(struct file *file,
282 const char __user *buffer,
283 size_t count, loff_t *ppos)
285 /* Don't want to break ABI here by having a debugfs
286 * file that no longer exists or is writable, so
287 * lets just make this a vestigual function
293 * visornic_serverdown_complete - IOPART went down, pause device
294 * @work: Work queue it was scheduled on
296 * The IO partition has gone down and we need to do some cleanup
297 * for when it comes back. Treat the IO partition as the link
302 visornic_serverdown_complete(struct visornic_devdata *devdata)
304 struct net_device *netdev;
306 netdev = devdata->netdev;
308 /* Stop polling for interrupts */
309 del_timer_sync(&devdata->irq_poll_timer);
315 atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
316 devdata->chstat.sent_xmit = 0;
317 devdata->chstat.got_xmit_done = 0;
319 if (devdata->server_down_complete_func)
320 (*devdata->server_down_complete_func)(devdata->dev, 0);
322 devdata->server_down = true;
323 devdata->server_change_state = false;
324 devdata->server_down_complete_func = NULL;
328 * visornic_serverdown - Command has notified us that IOPART is down
329 * @devdata: device that is being managed by IOPART
331 * Schedule the work needed to handle the server down request. Make
332 * sure we haven't already handled the server change state event.
333 * Returns 0 if we scheduled the work, -EINVAL on error.
336 visornic_serverdown(struct visornic_devdata *devdata,
337 visorbus_state_complete_func complete_func)
342 spin_lock_irqsave(&devdata->priv_lock, flags);
343 if (devdata->server_change_state) {
344 dev_dbg(&devdata->dev->device, "%s changing state\n",
349 if (devdata->server_down) {
350 dev_dbg(&devdata->dev->device, "%s already down\n",
355 if (devdata->going_away) {
356 dev_dbg(&devdata->dev->device,
357 "%s aborting because device removal pending\n",
362 devdata->server_change_state = true;
363 devdata->server_down_complete_func = complete_func;
364 spin_unlock_irqrestore(&devdata->priv_lock, flags);
366 visornic_serverdown_complete(devdata);
370 spin_unlock_irqrestore(&devdata->priv_lock, flags);
375 * alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition.
376 * @netdev: network adapter the rcv bufs are attached too.
378 * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
379 * so that it can write rcv data into our memory space.
380 * Return pointer to sk_buff
382 static struct sk_buff *
383 alloc_rcv_buf(struct net_device *netdev)
387 /* NOTE: the first fragment in each rcv buffer is pointed to by
388 * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
389 * in length, so the first frag is large enough to hold 1514.
391 skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
395 /* current value of mtu doesn't come into play here; large
396 * packets will just end up using multiple rcv buffers all of
399 skb->len = RCVPOST_BUF_SIZE;
400 /* alloc_skb already zeroes it out for clarification. */
406 * post_skb - post a skb to the IO Partition.
407 * @cmdrsp: cmdrsp packet to be send to the IO Partition
408 * @devdata: visornic_devdata to post the skb too
409 * @skb: skb to give to the IO partition
411 * Send the skb to the IO Partition.
415 post_skb(struct uiscmdrsp *cmdrsp,
416 struct visornic_devdata *devdata, struct sk_buff *skb)
418 cmdrsp->net.buf = skb;
419 cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
420 cmdrsp->net.rcvpost.frag.pi_off =
421 (unsigned long)skb->data & PI_PAGE_MASK;
422 cmdrsp->net.rcvpost.frag.pi_len = skb->len;
423 cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
425 if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) <= PI_PAGE_SIZE) {
426 cmdrsp->net.type = NET_RCV_POST;
427 cmdrsp->cmdtype = CMD_NET_TYPE;
428 if (visorchannel_signalinsert(devdata->dev->visorchannel,
431 atomic_inc(&devdata->num_rcvbuf_in_iovm);
432 devdata->chstat.sent_post++;
434 devdata->chstat.sent_post_failed++;
440 * send_enbdis - send NET_RCV_ENBDIS to IO Partition
441 * @netdev: netdevice we are enable/disable, used as context
443 * @state: enable = 1/disable = 0
444 * @devdata: visornic device we are enabling/disabling
446 * Send the enable/disable message to the IO Partition.
450 send_enbdis(struct net_device *netdev, int state,
451 struct visornic_devdata *devdata)
453 devdata->cmdrsp_rcv->net.enbdis.enable = state;
454 devdata->cmdrsp_rcv->net.enbdis.context = netdev;
455 devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
456 devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
457 if (visorchannel_signalinsert(devdata->dev->visorchannel,
459 devdata->cmdrsp_rcv))
460 devdata->chstat.sent_enbdis++;
464 * visornic_disable_with_timeout - Disable network adapter
465 * @netdev: netdevice to disale
466 * @timeout: timeout to wait for disable
468 * Disable the network adapter and inform the IO Partition that we
469 * are disabled, reclaim memory from rcv bufs.
470 * Returns 0 on success, negative for failure of IO Partition
475 visornic_disable_with_timeout(struct net_device *netdev, const int timeout)
477 struct visornic_devdata *devdata = netdev_priv(netdev);
482 /* send a msg telling the other end we are stopping incoming pkts */
483 spin_lock_irqsave(&devdata->priv_lock, flags);
484 devdata->enabled = 0;
485 devdata->enab_dis_acked = 0; /* must wait for ack */
486 spin_unlock_irqrestore(&devdata->priv_lock, flags);
488 /* send disable and wait for ack -- don't hold lock when sending
489 * disable because if the queue is full, insert might sleep.
491 send_enbdis(netdev, 0, devdata);
493 /* wait for ack to arrive before we try to free rcv buffers
494 * NOTE: the other end automatically unposts the rcv buffers when
495 * when it gets a disable.
497 spin_lock_irqsave(&devdata->priv_lock, flags);
498 while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
500 if (devdata->enab_dis_acked)
502 if (devdata->server_down || devdata->server_change_state) {
503 spin_unlock_irqrestore(&devdata->priv_lock, flags);
504 dev_dbg(&netdev->dev, "%s server went away\n",
508 set_current_state(TASK_INTERRUPTIBLE);
509 spin_unlock_irqrestore(&devdata->priv_lock, flags);
510 wait += schedule_timeout(msecs_to_jiffies(10));
511 spin_lock_irqsave(&devdata->priv_lock, flags);
514 /* Wait for usage to go to 1 (no other users) before freeing
517 if (atomic_read(&devdata->usage) > 1) {
519 set_current_state(TASK_INTERRUPTIBLE);
520 spin_unlock_irqrestore(&devdata->priv_lock, flags);
521 schedule_timeout(msecs_to_jiffies(10));
522 spin_lock_irqsave(&devdata->priv_lock, flags);
523 if (atomic_read(&devdata->usage))
527 /* we've set enabled to 0, so we can give up the lock. */
528 spin_unlock_irqrestore(&devdata->priv_lock, flags);
530 /* stop the transmit queue so nothing more can be transmitted */
531 netif_stop_queue(netdev);
533 napi_disable(&devdata->napi);
535 skb_queue_purge(&devdata->xmitbufhead);
537 /* Free rcv buffers - other end has automatically unposed them on
540 for (i = 0; i < devdata->num_rcv_bufs; i++) {
541 if (devdata->rcvbuf[i]) {
542 kfree_skb(devdata->rcvbuf[i]);
543 devdata->rcvbuf[i] = NULL;
551 * init_rcv_bufs -- initialize receive bufs and send them to the IO Part
552 * @netdev: struct netdevice
553 * @devdata: visornic_devdata
555 * Allocate rcv buffers and post them to the IO Partition.
556 * Return 0 for success, and negative for failure.
559 init_rcv_bufs(struct net_device *netdev, struct visornic_devdata *devdata)
563 /* allocate fixed number of receive buffers to post to uisnic
564 * post receive buffers after we've allocated a required amount
566 for (i = 0; i < devdata->num_rcv_bufs; i++) {
567 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
568 if (!devdata->rcvbuf[i])
569 break; /* if we failed to allocate one let us stop */
571 if (i == 0) /* couldn't even allocate one -- bail out */
575 /* Ensure we can alloc 2/3rd of the requeested number of buffers.
576 * 2/3 is an arbitrary choice; used also in ndis init.c
578 if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
579 /* free receive buffers we did alloc and then bail out */
580 for (i = 0; i < count; i++) {
581 kfree_skb(devdata->rcvbuf[i]);
582 devdata->rcvbuf[i] = NULL;
587 /* post receive buffers to receive incoming input - without holding
588 * lock - we've not enabled nor started the queue so there shouldn't
589 * be any rcv or xmit activity
591 for (i = 0; i < count; i++)
592 post_skb(devdata->cmdrsp_rcv, devdata, devdata->rcvbuf[i]);
598 * visornic_enable_with_timeout - send enable to IO Part
599 * @netdev: struct net_device
600 * @timeout: Time to wait for the ACK from the enable
602 * Sends enable to IOVM, inits, and posts receive buffers to IOVM
603 * timeout is defined in msecs (timeout of 0 specifies infinite wait)
604 * Return 0 for success, negavite for failure.
607 visornic_enable_with_timeout(struct net_device *netdev, const int timeout)
610 struct visornic_devdata *devdata = netdev_priv(netdev);
614 /* NOTE: the other end automatically unposts the rcv buffers when it
617 i = init_rcv_bufs(netdev, devdata);
619 dev_err(&netdev->dev,
620 "%s failed to init rcv bufs (%d)\n", __func__, i);
624 spin_lock_irqsave(&devdata->priv_lock, flags);
625 devdata->enabled = 1;
626 devdata->enab_dis_acked = 0;
628 /* now we're ready, let's send an ENB to uisnic but until we get
629 * an ACK back from uisnic, we'll drop the packets
631 devdata->n_rcv_packets_not_accepted = 0;
632 spin_unlock_irqrestore(&devdata->priv_lock, flags);
634 /* send enable and wait for ack -- don't hold lock when sending enable
635 * because if the queue is full, insert might sleep.
637 napi_enable(&devdata->napi);
638 send_enbdis(netdev, 1, devdata);
640 spin_lock_irqsave(&devdata->priv_lock, flags);
641 while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
643 if (devdata->enab_dis_acked)
645 if (devdata->server_down || devdata->server_change_state) {
646 spin_unlock_irqrestore(&devdata->priv_lock, flags);
647 dev_dbg(&netdev->dev, "%s server went away\n",
651 set_current_state(TASK_INTERRUPTIBLE);
652 spin_unlock_irqrestore(&devdata->priv_lock, flags);
653 wait += schedule_timeout(msecs_to_jiffies(10));
654 spin_lock_irqsave(&devdata->priv_lock, flags);
657 spin_unlock_irqrestore(&devdata->priv_lock, flags);
659 if (!devdata->enab_dis_acked) {
660 dev_err(&netdev->dev, "%s missing ACK\n", __func__);
664 netif_start_queue(netdev);
670 * visornic_timeout_reset - handle xmit timeout resets
671 * @work work item that scheduled the work
673 * Transmit Timeouts are typically handled by resetting the
674 * device for our virtual NIC we will send a Disable and Enable
675 * to the IOVM. If it doesn't respond we will trigger a serverdown.
678 visornic_timeout_reset(struct work_struct *work)
680 struct visornic_devdata *devdata;
681 struct net_device *netdev;
684 devdata = container_of(work, struct visornic_devdata, timeout_reset);
685 netdev = devdata->netdev;
688 if (!netif_running(netdev)) {
693 response = visornic_disable_with_timeout(netdev,
694 VISORNIC_INFINITE_RSP_WAIT);
696 goto call_serverdown;
698 response = visornic_enable_with_timeout(netdev,
699 VISORNIC_INFINITE_RSP_WAIT);
701 goto call_serverdown;
708 visornic_serverdown(devdata, NULL);
713 * visornic_open - Enable the visornic device and mark the queue started
714 * @netdev: netdevice to start
716 * Enable the device and start the transmit queue.
717 * Return 0 for success
720 visornic_open(struct net_device *netdev)
722 visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
728 * visornic_close - Disables the visornic device and stops the queues
729 * @netdev: netdevice to start
731 * Disable the device and stop the transmit queue.
732 * Return 0 for success
735 visornic_close(struct net_device *netdev)
737 visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
743 * devdata_xmits_outstanding - compute outstanding xmits
744 * @devdata: visornic_devdata for device
746 * Return value is the number of outstanding xmits.
748 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
750 if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
751 return devdata->chstat.sent_xmit -
752 devdata->chstat.got_xmit_done;
753 return (ULONG_MAX - devdata->chstat.got_xmit_done
754 + devdata->chstat.sent_xmit + 1);
758 * vnic_hit_high_watermark
759 * @devdata: indicates visornic device we are checking
760 * @high_watermark: max num of unacked xmits we will tolerate,
761 * before we will start throttling
763 * Returns true iff the number of unacked xmits sent to
764 * the IO partition is >= high_watermark.
766 static inline bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
767 ulong high_watermark)
769 return (devdata_xmits_outstanding(devdata) >= high_watermark);
773 * vnic_hit_low_watermark
774 * @devdata: indicates visornic device we are checking
775 * @low_watermark: we will wait until the num of unacked xmits
776 * drops to this value or lower before we start
779 * Returns true iff the number of unacked xmits sent to
780 * the IO partition is <= low_watermark.
782 static inline bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
785 return (devdata_xmits_outstanding(devdata) <= low_watermark);
789 * visornic_xmit - send a packet to the IO Partition
790 * @skb: Packet to be sent
791 * @netdev: net device the packet is being sent from
793 * Convert the skb to a cmdrsp so the IO Partition can undersand it.
794 * Send the XMIT command to the IO Partition for processing. This
795 * function is protected from concurrent calls by a spinlock xmit_lock
796 * in the net_device struct, but as soon as the function returns it
797 * can be called again.
798 * Returns NETDEV_TX_OK.
801 visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
803 struct visornic_devdata *devdata;
804 int len, firstfraglen, padlen;
805 struct uiscmdrsp *cmdrsp = NULL;
808 devdata = netdev_priv(netdev);
809 spin_lock_irqsave(&devdata->priv_lock, flags);
811 if (netif_queue_stopped(netdev) || devdata->server_down ||
812 devdata->server_change_state) {
813 spin_unlock_irqrestore(&devdata->priv_lock, flags);
815 dev_dbg(&netdev->dev,
816 "%s busy - queue stopped\n", __func__);
821 /* sk_buff struct is used to host network data throughout all the
822 * linux network subsystems
826 /* skb->len is the FULL length of data (including fragmentary portion)
827 * skb->data_len is the length of the fragment portion in frags
828 * skb->len - skb->data_len is size of the 1st fragment in skb->data
829 * calculate the length of the first fragment that skb->data is
832 firstfraglen = skb->len - skb->data_len;
833 if (firstfraglen < ETH_HEADER_SIZE) {
834 spin_unlock_irqrestore(&devdata->priv_lock, flags);
836 dev_err(&netdev->dev,
837 "%s busy - first frag too small (%d)\n",
838 __func__, firstfraglen);
843 if ((len < ETH_MIN_PACKET_SIZE) &&
844 ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
845 /* pad the packet out to minimum size */
846 padlen = ETH_MIN_PACKET_SIZE - len;
847 memset(&skb->data[len], 0, padlen);
851 firstfraglen += padlen;
854 cmdrsp = devdata->xmit_cmdrsp;
856 memset(cmdrsp, 0, SIZEOF_CMDRSP);
857 cmdrsp->net.type = NET_XMIT;
858 cmdrsp->cmdtype = CMD_NET_TYPE;
860 /* save the pointer to skb -- we'll need it for completion */
861 cmdrsp->net.buf = skb;
863 if (vnic_hit_high_watermark(devdata,
864 devdata->max_outstanding_net_xmits)) {
865 /* extra NET_XMITs queued over to IOVM - need to wait */
866 devdata->chstat.reject_count++;
867 if (!devdata->queuefullmsg_logged &&
868 ((devdata->chstat.reject_count & 0x3ff) == 1))
869 devdata->queuefullmsg_logged = 1;
870 netif_stop_queue(netdev);
871 spin_unlock_irqrestore(&devdata->priv_lock, flags);
873 dev_dbg(&netdev->dev,
874 "%s busy - waiting for iovm to catch up\n",
879 if (devdata->queuefullmsg_logged)
880 devdata->queuefullmsg_logged = 0;
882 if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
883 cmdrsp->net.xmt.lincsum.valid = 1;
884 cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
885 if (skb_transport_header(skb) > skb->data) {
886 cmdrsp->net.xmt.lincsum.hrawoff =
887 skb_transport_header(skb) - skb->data;
888 cmdrsp->net.xmt.lincsum.hrawoff = 1;
890 if (skb_network_header(skb) > skb->data) {
891 cmdrsp->net.xmt.lincsum.nhrawoff =
892 skb_network_header(skb) - skb->data;
893 cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
895 cmdrsp->net.xmt.lincsum.csum = skb->csum;
897 cmdrsp->net.xmt.lincsum.valid = 0;
900 /* save off the length of the entire data packet */
901 cmdrsp->net.xmt.len = len;
903 /* copy ethernet header from first frag into ocmdrsp
904 * - everything else will be pass in frags & DMA'ed
906 memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HEADER_SIZE);
907 /* copy frags info - from skb->data we need to only provide access
910 cmdrsp->net.xmt.num_frags =
911 visor_copy_fragsinfo_from_skb(skb, firstfraglen,
913 cmdrsp->net.xmt.frags);
914 if (cmdrsp->net.xmt.num_frags < 0) {
915 spin_unlock_irqrestore(&devdata->priv_lock, flags);
917 dev_err(&netdev->dev,
918 "%s busy - copy frags failed\n", __func__);
923 if (!visorchannel_signalinsert(devdata->dev->visorchannel,
924 IOCHAN_TO_IOPART, cmdrsp)) {
925 netif_stop_queue(netdev);
926 spin_unlock_irqrestore(&devdata->priv_lock, flags);
928 dev_dbg(&netdev->dev,
929 "%s busy - signalinsert failed\n", __func__);
934 /* Track the skbs that have been sent to the IOVM for XMIT */
935 skb_queue_head(&devdata->xmitbufhead, skb);
937 /* update xmt stats */
938 devdata->net_stats.tx_packets++;
939 devdata->net_stats.tx_bytes += skb->len;
940 devdata->chstat.sent_xmit++;
942 /* check if we have hit the high watermark for netif_stop_queue() */
943 if (vnic_hit_high_watermark(devdata,
944 devdata->upper_threshold_net_xmits)) {
945 /* extra NET_XMITs queued over to IOVM - need to wait */
946 /* stop queue - call netif_wake_queue() after lower threshold */
947 netif_stop_queue(netdev);
948 dev_dbg(&netdev->dev,
949 "%s busy - invoking iovm flow control\n",
951 devdata->flow_control_upper_hits++;
953 spin_unlock_irqrestore(&devdata->priv_lock, flags);
955 /* skb will be freed when we get back NET_XMIT_DONE */
960 * visornic_get_stats - returns net_stats of the visornic device
963 * Returns the net_device_stats for the device
965 static struct net_device_stats *
966 visornic_get_stats(struct net_device *netdev)
968 struct visornic_devdata *devdata = netdev_priv(netdev);
970 return &devdata->net_stats;
974 * visornic_change_mtu - changes mtu of device.
976 * @new_mtu: value of new mtu
978 * MTU cannot be changed by system, must be changed via
979 * CONTROLVM message. All vnics and pnics in a switch have
980 * to have the same MTU for everything to work.
981 * Currently not supported.
985 visornic_change_mtu(struct net_device *netdev, int new_mtu)
991 * visornic_set_multi - changes mtu of device.
994 * Only flag we support currently is IFF_PROMISC
998 visornic_set_multi(struct net_device *netdev)
1000 struct uiscmdrsp *cmdrsp;
1001 struct visornic_devdata *devdata = netdev_priv(netdev);
1003 if (devdata->old_flags == netdev->flags)
1006 if ((netdev->flags & IFF_PROMISC) ==
1007 (devdata->old_flags & IFF_PROMISC))
1008 goto out_save_flags;
1010 cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1013 cmdrsp->cmdtype = CMD_NET_TYPE;
1014 cmdrsp->net.type = NET_RCV_PROMISC;
1015 cmdrsp->net.enbdis.context = netdev;
1016 cmdrsp->net.enbdis.enable =
1017 netdev->flags & IFF_PROMISC;
1018 visorchannel_signalinsert(devdata->dev->visorchannel,
1024 devdata->old_flags = netdev->flags;
1028 * visornic_xmit_timeout - request to timeout the xmit
1031 * Queue the work and return. Make sure we have not already
1032 * been informed the IO Partition is gone, if it is gone
1033 * we will already timeout the xmits.
1036 visornic_xmit_timeout(struct net_device *netdev)
1038 struct visornic_devdata *devdata = netdev_priv(netdev);
1039 unsigned long flags;
1041 spin_lock_irqsave(&devdata->priv_lock, flags);
1042 if (devdata->going_away) {
1043 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1044 dev_dbg(&devdata->dev->device,
1045 "%s aborting because device removal pending\n",
1050 /* Ensure that a ServerDown message hasn't been received */
1051 if (!devdata->enabled ||
1052 (devdata->server_down && !devdata->server_change_state)) {
1053 dev_dbg(&netdev->dev, "%s no processing\n",
1055 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1058 schedule_work(&devdata->timeout_reset);
1059 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1063 * repost_return - repost rcv bufs that have come back
1064 * @cmdrsp: io channel command struct to post
1065 * @devdata: visornic devdata for the device
1067 * @netdev: netdevice
1069 * Repost rcv buffers that have been returned to us when
1070 * we are finished with them.
1071 * Returns 0 for success, -1 for error.
1074 repost_return(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1075 struct sk_buff *skb, struct net_device *netdev)
1077 struct net_pkt_rcv copy;
1078 int i = 0, cc, numreposted;
1082 copy = cmdrsp->net.rcv;
1083 switch (copy.numrcvbufs) {
1097 for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1098 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1099 if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1102 if ((skb) && devdata->rcvbuf[i] == skb) {
1103 devdata->found_repost_rcvbuf_cnt++;
1105 devdata->repost_found_skb_cnt++;
1107 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1108 if (!devdata->rcvbuf[i]) {
1109 devdata->num_rcv_bufs_could_not_alloc++;
1110 devdata->alloc_failed_in_repost_rtn_cnt++;
1114 post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1119 if (numreposted != copy.numrcvbufs) {
1120 devdata->n_repost_deficit++;
1128 devdata->bad_rcv_buf++;
1135 * visornic_rx - Handle receive packets coming back from IO Part
1136 * @cmdrsp: Receive packet returned from IO Part
1138 * Got a receive packet back from the IO Part, handle it and send
1140 * Returns 1 iff an skb was receieved, otherwise 0
1143 visornic_rx(struct uiscmdrsp *cmdrsp)
1145 struct visornic_devdata *devdata;
1146 struct sk_buff *skb, *prev, *curr;
1147 struct net_device *netdev;
1148 int cc, currsize, off;
1150 unsigned long flags;
1152 /* post new rcv buf to the other end using the cmdrsp we have at hand
1153 * post it without holding lock - but we'll use the signal lock to
1154 * synchronize the queue insert the cmdrsp that contains the net.rcv
1155 * is the one we are using to repost, so copy the info we need from it.
1157 skb = cmdrsp->net.buf;
1160 devdata = netdev_priv(netdev);
1162 spin_lock_irqsave(&devdata->priv_lock, flags);
1163 atomic_dec(&devdata->num_rcvbuf_in_iovm);
1165 /* set length to how much was ACTUALLY received -
1166 * NOTE: rcv_done_len includes actual length of data rcvd
1169 skb->len = cmdrsp->net.rcv.rcv_done_len;
1171 /* update rcv stats - call it with priv_lock held */
1172 devdata->net_stats.rx_packets++;
1173 devdata->net_stats.rx_bytes += skb->len;
1175 /* test enabled while holding lock */
1176 if (!(devdata->enabled && devdata->enab_dis_acked)) {
1177 /* don't process it unless we're in enable mode and until
1178 * we've gotten an ACK saying the other end got our RCV enable
1180 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1181 repost_return(cmdrsp, devdata, skb, netdev);
1185 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1187 /* when skb was allocated, skb->dev, skb->data, skb->len and
1188 * skb->data_len were setup. AND, data has already put into the
1189 * skb (both first frag and in frags pages)
1190 * NOTE: firstfragslen is the amount of data in skb->data and that
1191 * which is not in nr_frags or frag_list. This is now simply
1192 * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1193 * firstfrag & set data_len to show rest see if we have to chain
1196 if (skb->len > RCVPOST_BUF_SIZE) { /* do PRECAUTIONARY check */
1197 if (cmdrsp->net.rcv.numrcvbufs < 2) {
1198 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1199 dev_err(&devdata->netdev->dev,
1200 "repost_return failed");
1203 /* length rcvd is greater than firstfrag in this skb rcv buf */
1204 skb->tail += RCVPOST_BUF_SIZE; /* amount in skb->data */
1205 skb->data_len = skb->len - RCVPOST_BUF_SIZE; /* amount that
1210 /* data fits in this skb - no chaining - do
1211 * PRECAUTIONARY check
1213 if (cmdrsp->net.rcv.numrcvbufs != 1) { /* should be 1 */
1214 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1215 dev_err(&devdata->netdev->dev,
1216 "repost_return failed");
1219 skb->tail += skb->len;
1220 skb->data_len = 0; /* nothing rcvd in frag_list */
1222 off = skb_tail_pointer(skb) - skb->data;
1224 /* amount we bumped tail by in the head skb
1225 * it is used to calculate the size of each chained skb below
1226 * it is also used to index into bufline to continue the copy
1227 * (for chansocktwopc)
1228 * if necessary chain the rcv skbs together.
1229 * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1230 * chain the rest to that one.
1231 * - do PRECAUTIONARY check
1233 if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1234 if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1235 dev_err(&devdata->netdev->dev, "repost_return failed");
1239 if (cmdrsp->net.rcv.numrcvbufs > 1) {
1240 /* chain the various rcv buffers into the skb's frag_list. */
1241 /* Note: off was initialized above */
1242 for (cc = 1, prev = NULL;
1243 cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1244 curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1246 if (!prev) /* start of list- set head */
1247 skb_shinfo(skb)->frag_list = curr;
1252 /* should we set skb->len and skb->data_len for each
1253 * buffer being chained??? can't hurt!
1255 currsize = min(skb->len - off,
1256 (unsigned int)RCVPOST_BUF_SIZE);
1257 curr->len = currsize;
1258 curr->tail += currsize;
1262 /* assert skb->len == off */
1263 if (skb->len != off) {
1264 netdev_err(devdata->netdev,
1265 "something wrong; skb->len:%d != off:%d\n",
1270 /* set up packet's protocl type using ethernet header - this
1271 * sets up skb->pkt_type & it also PULLS out the eth header
1273 skb->protocol = eth_type_trans(skb, netdev);
1278 skb->ip_summed = CHECKSUM_NONE;
1281 if (netdev->flags & IFF_PROMISC)
1282 break; /* accept all packets */
1283 if (skb->pkt_type == PACKET_BROADCAST) {
1284 if (netdev->flags & IFF_BROADCAST)
1285 break; /* accept all broadcast packets */
1286 } else if (skb->pkt_type == PACKET_MULTICAST) {
1287 if ((netdev->flags & IFF_MULTICAST) &&
1288 (netdev_mc_count(netdev))) {
1289 struct netdev_hw_addr *ha;
1292 /* only accept multicast packets that we can
1293 * find in our multicast address list
1295 netdev_for_each_mc_addr(ha, netdev) {
1296 if (ether_addr_equal(eth->h_dest,
1302 /* accept pkt, dest matches a multicast addr */
1306 /* accept packet, h_dest must match vnic mac address */
1307 } else if (skb->pkt_type == PACKET_HOST) {
1309 } else if (skb->pkt_type == PACKET_OTHERHOST) {
1310 /* something is not right */
1311 dev_err(&devdata->netdev->dev,
1312 "**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1313 netdev->name, eth->h_dest, netdev->dev_addr);
1315 /* drop packet - don't forward it up to OS */
1316 devdata->n_rcv_packets_not_accepted++;
1317 repost_return(cmdrsp, devdata, skb, netdev);
1321 netif_receive_skb(skb);
1322 /* netif_rx returns various values, but "in practice most drivers
1323 * ignore the return value
1328 * whether the packet got dropped or handled, the skb is freed by
1329 * kernel code, so we shouldn't free it. but we should repost a
1332 repost_return(cmdrsp, devdata, skb, netdev);
1337 * devdata_initialize - Initialize devdata structure
1338 * @devdata: visornic_devdata structure to initialize
1339 * #dev: visorbus_deviced it belongs to
1341 * Setup initial values for the visornic based on channel and default
1343 * Returns a pointer to the devdata structure
1345 static struct visornic_devdata *
1346 devdata_initialize(struct visornic_devdata *devdata, struct visor_device *dev)
1349 devdata->incarnation_id = get_jiffies_64();
1354 * devdata_release - Frees up references in devdata
1355 * @devdata: struct to clean up
1357 * Frees up references in devdata.
1360 static void devdata_release(struct visornic_devdata *devdata)
1362 kfree(devdata->rcvbuf);
1363 kfree(devdata->cmdrsp_rcv);
1364 kfree(devdata->xmit_cmdrsp);
1367 static const struct net_device_ops visornic_dev_ops = {
1368 .ndo_open = visornic_open,
1369 .ndo_stop = visornic_close,
1370 .ndo_start_xmit = visornic_xmit,
1371 .ndo_get_stats = visornic_get_stats,
1372 .ndo_change_mtu = visornic_change_mtu,
1373 .ndo_tx_timeout = visornic_xmit_timeout,
1374 .ndo_set_rx_mode = visornic_set_multi,
1378 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1379 size_t len, loff_t *offset)
1381 ssize_t bytes_read = 0;
1383 struct visornic_devdata *devdata;
1384 struct net_device *dev;
1389 vbuf = kzalloc(len, GFP_KERNEL);
1393 /* for each vnic channel dump out channel specific data */
1395 for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1396 /* Only consider netdevs that are visornic, and are open */
1397 if ((dev->netdev_ops != &visornic_dev_ops) ||
1398 (!netif_queue_stopped(dev)))
1401 devdata = netdev_priv(dev);
1402 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1403 "netdev = %s (0x%p), MAC Addr %pM\n",
1407 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1408 "VisorNic Dev Info = 0x%p\n", devdata);
1409 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1410 " num_rcv_bufs = %d\n",
1411 devdata->num_rcv_bufs);
1412 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1413 " max_oustanding_next_xmits = %lu\n",
1414 devdata->max_outstanding_net_xmits);
1415 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1416 " upper_threshold_net_xmits = %lu\n",
1417 devdata->upper_threshold_net_xmits);
1418 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1419 " lower_threshold_net_xmits = %lu\n",
1420 devdata->lower_threshold_net_xmits);
1421 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1422 " queuefullmsg_logged = %d\n",
1423 devdata->queuefullmsg_logged);
1424 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1425 " chstat.got_rcv = %lu\n",
1426 devdata->chstat.got_rcv);
1427 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1428 " chstat.got_enbdisack = %lu\n",
1429 devdata->chstat.got_enbdisack);
1430 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1431 " chstat.got_xmit_done = %lu\n",
1432 devdata->chstat.got_xmit_done);
1433 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1434 " chstat.xmit_fail = %lu\n",
1435 devdata->chstat.xmit_fail);
1436 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1437 " chstat.sent_enbdis = %lu\n",
1438 devdata->chstat.sent_enbdis);
1439 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1440 " chstat.sent_promisc = %lu\n",
1441 devdata->chstat.sent_promisc);
1442 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1443 " chstat.sent_post = %lu\n",
1444 devdata->chstat.sent_post);
1445 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1446 " chstat.sent_post_failed = %lu\n",
1447 devdata->chstat.sent_post_failed);
1448 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1449 " chstat.sent_xmit = %lu\n",
1450 devdata->chstat.sent_xmit);
1451 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1452 " chstat.reject_count = %lu\n",
1453 devdata->chstat.reject_count);
1454 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1455 " chstat.extra_rcvbufs_sent = %lu\n",
1456 devdata->chstat.extra_rcvbufs_sent);
1457 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1458 " n_rcv0 = %lu\n", devdata->n_rcv0);
1459 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1460 " n_rcv1 = %lu\n", devdata->n_rcv1);
1461 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1462 " n_rcv2 = %lu\n", devdata->n_rcv2);
1463 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1464 " n_rcvx = %lu\n", devdata->n_rcvx);
1465 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1466 " num_rcvbuf_in_iovm = %d\n",
1467 atomic_read(&devdata->num_rcvbuf_in_iovm));
1468 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1469 " alloc_failed_in_if_needed_cnt = %lu\n",
1470 devdata->alloc_failed_in_if_needed_cnt);
1471 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1472 " alloc_failed_in_repost_rtn_cnt = %lu\n",
1473 devdata->alloc_failed_in_repost_rtn_cnt);
1474 /* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1475 * " inner_loop_limit_reached_cnt = %lu\n",
1476 * devdata->inner_loop_limit_reached_cnt);
1478 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1479 " found_repost_rcvbuf_cnt = %lu\n",
1480 devdata->found_repost_rcvbuf_cnt);
1481 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1482 " repost_found_skb_cnt = %lu\n",
1483 devdata->repost_found_skb_cnt);
1484 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1485 " n_repost_deficit = %lu\n",
1486 devdata->n_repost_deficit);
1487 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1488 " bad_rcv_buf = %lu\n",
1489 devdata->bad_rcv_buf);
1490 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1491 " n_rcv_packets_not_accepted = %lu\n",
1492 devdata->n_rcv_packets_not_accepted);
1493 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1494 " interrupts_rcvd = %llu\n",
1495 devdata->interrupts_rcvd);
1496 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1497 " interrupts_notme = %llu\n",
1498 devdata->interrupts_notme);
1499 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1500 " interrupts_disabled = %llu\n",
1501 devdata->interrupts_disabled);
1502 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1503 " busy_cnt = %llu\n",
1505 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1506 " flow_control_upper_hits = %llu\n",
1507 devdata->flow_control_upper_hits);
1508 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1509 " flow_control_lower_hits = %llu\n",
1510 devdata->flow_control_lower_hits);
1511 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1512 " netif_queue = %s\n",
1513 netif_queue_stopped(devdata->netdev) ?
1514 "stopped" : "running");
1515 str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1516 " xmits_outstanding = %lu\n",
1517 devdata_xmits_outstanding(devdata));
1520 bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1526 * send_rcv_posts_if_needed
1527 * @devdata: visornic device
1529 * Send receive buffers to the IO Partition.
1533 send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1536 struct net_device *netdev;
1537 struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1538 int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1540 /* don't do this until vnic is marked ready */
1541 if (!(devdata->enabled && devdata->enab_dis_acked))
1544 netdev = devdata->netdev;
1545 rcv_bufs_allocated = 0;
1546 /* this code is trying to prevent getting stuck here forever,
1547 * but still retry it if you cant allocate them all this time.
1549 cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1550 while (cur_num_rcv_bufs_to_alloc > 0) {
1551 cur_num_rcv_bufs_to_alloc--;
1552 for (i = 0; i < devdata->num_rcv_bufs; i++) {
1553 if (devdata->rcvbuf[i])
1555 devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1556 if (!devdata->rcvbuf[i]) {
1557 devdata->alloc_failed_in_if_needed_cnt++;
1560 rcv_bufs_allocated++;
1561 post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1562 devdata->chstat.extra_rcvbufs_sent++;
1565 devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1569 * drain_resp_queue - drains and ignores all messages from the resp queue
1570 * @cmdrsp: io channel command response message
1571 * @devdata: visornic device to drain
1574 drain_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata)
1576 while (visorchannel_signalremove(devdata->dev->visorchannel,
1583 * service_resp_queue - drains the response queue
1584 * @cmdrsp: io channel command response message
1585 * @devdata: visornic device to drain
1587 * Drain the respones queue of any responses from the IO partition.
1588 * Process the responses as we get them.
1589 * Returns when response queue is empty or when the threadd stops.
1592 service_resp_queue(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
1593 int *rx_work_done, int budget)
1595 unsigned long flags;
1596 struct net_device *netdev;
1598 while (*rx_work_done < budget) {
1599 /* TODO: CLIENT ACQUIRE -- Don't really need this at the
1602 if (!visorchannel_signalremove(devdata->dev->visorchannel,
1605 break; /* queue empty */
1607 switch (cmdrsp->net.type) {
1609 devdata->chstat.got_rcv++;
1610 /* process incoming packet */
1611 *rx_work_done += visornic_rx(cmdrsp);
1614 spin_lock_irqsave(&devdata->priv_lock, flags);
1615 devdata->chstat.got_xmit_done++;
1616 if (cmdrsp->net.xmtdone.xmt_done_result)
1617 devdata->chstat.xmit_fail++;
1618 /* only call queue wake if we stopped it */
1619 netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1620 /* ASSERT netdev == vnicinfo->netdev; */
1621 if ((netdev == devdata->netdev) &&
1622 netif_queue_stopped(netdev)) {
1623 /* check if we have crossed the lower watermark
1624 * for netif_wake_queue()
1626 if (vnic_hit_low_watermark
1628 devdata->lower_threshold_net_xmits)) {
1629 /* enough NET_XMITs completed
1630 * so can restart netif queue
1632 netif_wake_queue(netdev);
1633 devdata->flow_control_lower_hits++;
1636 skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1637 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1638 kfree_skb(cmdrsp->net.buf);
1640 case NET_RCV_ENBDIS_ACK:
1641 devdata->chstat.got_enbdisack++;
1642 netdev = (struct net_device *)
1643 cmdrsp->net.enbdis.context;
1644 spin_lock_irqsave(&devdata->priv_lock, flags);
1645 devdata->enab_dis_acked = 1;
1646 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1648 if (devdata->server_down &&
1649 devdata->server_change_state) {
1650 /* Inform Linux that the link is up */
1651 devdata->server_down = false;
1652 devdata->server_change_state = false;
1653 netif_wake_queue(netdev);
1654 netif_carrier_on(netdev);
1657 case NET_CONNECT_STATUS:
1658 netdev = devdata->netdev;
1659 if (cmdrsp->net.enbdis.enable == 1) {
1660 spin_lock_irqsave(&devdata->priv_lock, flags);
1661 devdata->enabled = cmdrsp->net.enbdis.enable;
1662 spin_unlock_irqrestore(&devdata->priv_lock,
1664 netif_wake_queue(netdev);
1665 netif_carrier_on(netdev);
1667 netif_stop_queue(netdev);
1668 netif_carrier_off(netdev);
1669 spin_lock_irqsave(&devdata->priv_lock, flags);
1670 devdata->enabled = cmdrsp->net.enbdis.enable;
1671 spin_unlock_irqrestore(&devdata->priv_lock,
1678 /* cmdrsp is now available for reuse */
1682 static int visornic_poll(struct napi_struct *napi, int budget)
1684 struct visornic_devdata *devdata = container_of(napi,
1685 struct visornic_devdata,
1689 send_rcv_posts_if_needed(devdata);
1690 service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1692 /* If there aren't any more packets to receive stop the poll */
1693 if (rx_count < budget)
1694 napi_complete(napi);
1700 * poll_for_irq - Checks the status of the response queue.
1701 * @v: void pointer to the visronic devdata
1703 * Main function of the vnic_incoming thread. Peridocially check the
1704 * response queue and drain it if needed.
1705 * Returns when thread has stopped.
1708 poll_for_irq(unsigned long v)
1710 struct visornic_devdata *devdata = (struct visornic_devdata *)v;
1712 if (!visorchannel_signalempty(
1713 devdata->dev->visorchannel,
1714 IOCHAN_FROM_IOPART))
1715 napi_schedule(&devdata->napi);
1717 atomic_set(&devdata->interrupt_rcvd, 0);
1719 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1723 * visornic_probe - probe function for visornic devices
1724 * @dev: The visor device discovered
1726 * Called when visorbus discovers a visornic device on its
1727 * bus. It creates a new visornic ethernet adapter.
1728 * Returns 0 or negative for error.
1730 static int visornic_probe(struct visor_device *dev)
1732 struct visornic_devdata *devdata = NULL;
1733 struct net_device *netdev = NULL;
1735 int channel_offset = 0;
1738 netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1740 dev_err(&dev->device,
1741 "%s alloc_etherdev failed\n", __func__);
1745 netdev->netdev_ops = &visornic_dev_ops;
1746 netdev->watchdog_timeo = 5 * HZ;
1747 SET_NETDEV_DEV(netdev, &dev->device);
1749 /* Get MAC adddress from channel and read it into the device. */
1750 netdev->addr_len = ETH_ALEN;
1751 channel_offset = offsetof(struct spar_io_channel_protocol,
1753 err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1756 dev_err(&dev->device,
1757 "%s failed to get mac addr from chan (%d)\n",
1759 goto cleanup_netdev;
1762 devdata = devdata_initialize(netdev_priv(netdev), dev);
1764 dev_err(&dev->device,
1765 "%s devdata_initialize failed\n", __func__);
1767 goto cleanup_netdev;
1769 /* don't trust messages laying around in the channel */
1770 drain_resp_queue(devdata->cmdrsp, devdata);
1772 devdata->netdev = netdev;
1773 dev_set_drvdata(&dev->device, devdata);
1774 init_waitqueue_head(&devdata->rsp_queue);
1775 spin_lock_init(&devdata->priv_lock);
1776 devdata->enabled = 0; /* not yet */
1777 atomic_set(&devdata->usage, 1);
1779 /* Setup rcv bufs */
1780 channel_offset = offsetof(struct spar_io_channel_protocol,
1782 err = visorbus_read_channel(dev, channel_offset,
1783 &devdata->num_rcv_bufs, 4);
1785 dev_err(&dev->device,
1786 "%s failed to get #rcv bufs from chan (%d)\n",
1788 goto cleanup_netdev;
1791 devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1792 sizeof(struct sk_buff *), GFP_KERNEL);
1793 if (!devdata->rcvbuf) {
1795 goto cleanup_netdev;
1798 /* set the net_xmit outstanding threshold */
1799 /* always leave two slots open but you should have 3 at a minimum */
1800 /* note that max_outstanding_net_xmits must be > 0 */
1801 devdata->max_outstanding_net_xmits =
1802 max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1803 devdata->upper_threshold_net_xmits =
1804 max_t(unsigned long,
1805 2, (devdata->max_outstanding_net_xmits - 1));
1806 devdata->lower_threshold_net_xmits =
1807 max_t(unsigned long,
1808 1, (devdata->max_outstanding_net_xmits / 2));
1810 skb_queue_head_init(&devdata->xmitbufhead);
1812 /* create a cmdrsp we can use to post and unpost rcv buffers */
1813 devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1814 if (!devdata->cmdrsp_rcv) {
1816 goto cleanup_rcvbuf;
1818 devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1819 if (!devdata->xmit_cmdrsp) {
1821 goto cleanup_cmdrsp_rcv;
1823 INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1824 devdata->server_down = false;
1825 devdata->server_change_state = false;
1827 /*set the default mtu */
1828 channel_offset = offsetof(struct spar_io_channel_protocol,
1830 err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1832 dev_err(&dev->device,
1833 "%s failed to get mtu from chan (%d)\n",
1835 goto cleanup_xmit_cmdrsp;
1838 /* TODO: Setup Interrupt information */
1839 /* Let's start our threads to get responses */
1840 netif_napi_add(netdev, &devdata->napi, visornic_poll, 64);
1842 setup_timer(&devdata->irq_poll_timer, poll_for_irq,
1843 (unsigned long)devdata);
1844 /* Note: This time has to start running before the while
1845 * loop below because the napi routine is responsible for
1846 * setting enab_dis_acked
1848 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1850 channel_offset = offsetof(struct spar_io_channel_protocol,
1851 channel_header.features);
1852 err = visorbus_read_channel(dev, channel_offset, &features, 8);
1854 dev_err(&dev->device,
1855 "%s failed to get features from chan (%d)\n",
1857 goto cleanup_napi_add;
1860 features |= ULTRA_IO_CHANNEL_IS_POLLING;
1861 features |= ULTRA_IO_DRIVER_SUPPORTS_ENHANCED_RCVBUF_CHECKING;
1862 err = visorbus_write_channel(dev, channel_offset, &features, 8);
1864 dev_err(&dev->device,
1865 "%s failed to set features in chan (%d)\n",
1867 goto cleanup_napi_add;
1870 /* Let's start our threads to get responses */
1871 netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1873 /* Note: Interupts have to be enable before the while
1874 * loop below because the napi routine is responsible for
1875 * setting enab_dis_acked
1877 visorbus_enable_channel_interrupts(dev);
1879 err = register_netdev(netdev);
1881 dev_err(&dev->device,
1882 "%s register_netdev failed (%d)\n", __func__, err);
1883 goto cleanup_napi_add;
1886 /* create debgug/sysfs directories */
1887 devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1888 visornic_debugfs_dir);
1889 if (!devdata->eth_debugfs_dir) {
1890 dev_err(&dev->device,
1891 "%s debugfs_create_dir %s failed\n",
1892 __func__, netdev->name);
1894 goto cleanup_register_netdev;
1897 dev_info(&dev->device, "%s success netdev=%s\n",
1898 __func__, netdev->name);
1901 cleanup_register_netdev:
1902 unregister_netdev(netdev);
1905 del_timer_sync(&devdata->irq_poll_timer);
1906 netif_napi_del(&devdata->napi);
1908 cleanup_xmit_cmdrsp:
1909 kfree(devdata->xmit_cmdrsp);
1912 kfree(devdata->cmdrsp_rcv);
1915 kfree(devdata->rcvbuf);
1918 free_netdev(netdev);
1923 * host_side_disappeared - IO part is gone.
1924 * @devdata: device object
1926 * IO partition servicing this device is gone, do cleanup
1929 static void host_side_disappeared(struct visornic_devdata *devdata)
1931 unsigned long flags;
1933 spin_lock_irqsave(&devdata->priv_lock, flags);
1934 devdata->dev = NULL; /* indicate device destroyed */
1935 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1939 * visornic_remove - Called when visornic dev goes away
1940 * @dev: visornic device that is being removed
1942 * Called when DEVICE_DESTROY gets called to remove device.
1945 static void visornic_remove(struct visor_device *dev)
1947 struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1948 struct net_device *netdev;
1949 unsigned long flags;
1952 dev_err(&dev->device, "%s no devdata\n", __func__);
1955 spin_lock_irqsave(&devdata->priv_lock, flags);
1956 if (devdata->going_away) {
1957 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1958 dev_err(&dev->device, "%s already being removed\n", __func__);
1961 devdata->going_away = true;
1962 spin_unlock_irqrestore(&devdata->priv_lock, flags);
1963 netdev = devdata->netdev;
1965 dev_err(&dev->device, "%s not net device\n", __func__);
1969 /* going_away prevents new items being added to the workqueues */
1970 cancel_work_sync(&devdata->timeout_reset);
1972 debugfs_remove_recursive(devdata->eth_debugfs_dir);
1974 unregister_netdev(netdev); /* this will call visornic_close() */
1976 del_timer_sync(&devdata->irq_poll_timer);
1977 netif_napi_del(&devdata->napi);
1979 dev_set_drvdata(&dev->device, NULL);
1980 host_side_disappeared(devdata);
1981 devdata_release(devdata);
1982 free_netdev(netdev);
1986 * visornic_pause - Called when IO Part disappears
1987 * @dev: visornic device that is being serviced
1988 * @complete_func: call when finished.
1990 * Called when the IO Partition has gone down. Need to free
1991 * up resources and wait for IO partition to come back. Mark
1992 * link as down and don't attempt any DMA. When we have freed
1993 * memory call the complete_func so that Command knows we are
1994 * done. If we don't call complete_func, IO part will never
1996 * Returns 0 for success.
1998 static int visornic_pause(struct visor_device *dev,
1999 visorbus_state_complete_func complete_func)
2001 struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2003 visornic_serverdown(devdata, complete_func);
2008 * visornic_resume - Called when IO part has recovered
2009 * @dev: visornic device that is being serviced
2010 * @compelte_func: call when finished
2012 * Called when the IO partition has recovered. Reestablish
2013 * connection to the IO part and set the link up. Okay to do
2015 * Returns 0 for success.
2017 static int visornic_resume(struct visor_device *dev,
2018 visorbus_state_complete_func complete_func)
2020 struct visornic_devdata *devdata;
2021 struct net_device *netdev;
2022 unsigned long flags;
2024 devdata = dev_get_drvdata(&dev->device);
2026 dev_err(&dev->device, "%s no devdata\n", __func__);
2030 netdev = devdata->netdev;
2032 spin_lock_irqsave(&devdata->priv_lock, flags);
2033 if (devdata->server_change_state) {
2034 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2035 dev_err(&dev->device, "%s server already changing state\n",
2039 if (!devdata->server_down) {
2040 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2041 dev_err(&dev->device, "%s server not down\n", __func__);
2042 complete_func(dev, 0);
2045 devdata->server_change_state = true;
2046 spin_unlock_irqrestore(&devdata->priv_lock, flags);
2048 /* Must transition channel to ATTACHED state BEFORE
2049 * we can start using the device again.
2050 * TODO: State transitions
2052 mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2054 init_rcv_bufs(netdev, devdata);
2060 complete_func(dev, 0);
2065 * visornic_init - Init function
2067 * Init function for the visornic driver. Do initial driver setup
2068 * and wait for devices.
2069 * Returns 0 for success, negative for error.
2071 static int visornic_init(void)
2076 visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2077 if (!visornic_debugfs_dir)
2080 ret = debugfs_create_file("info", S_IRUSR, visornic_debugfs_dir, NULL,
2081 &debugfs_info_fops);
2083 goto cleanup_debugfs;
2084 ret = debugfs_create_file("enable_ints", S_IWUSR, visornic_debugfs_dir,
2085 NULL, &debugfs_enable_ints_fops);
2087 goto cleanup_debugfs;
2089 err = visorbus_register_visor_driver(&visornic_driver);
2091 goto cleanup_debugfs;
2096 debugfs_remove_recursive(visornic_debugfs_dir);
2102 * visornic_cleanup - driver exit routine
2104 * Unregister driver from the bus and free up memory.
2106 static void visornic_cleanup(void)
2108 visorbus_unregister_visor_driver(&visornic_driver);
2110 debugfs_remove_recursive(visornic_debugfs_dir);
2113 module_init(visornic_init);
2114 module_exit(visornic_cleanup);
2116 MODULE_AUTHOR("Unisys");
2117 MODULE_LICENSE("GPL");
2118 MODULE_DESCRIPTION("sPAR nic driver for sparlinux: ver 1.0.0.0");
2119 MODULE_VERSION("1.0.0.0");