time: Remove timekeeping_inject_sleeptime()
[cascardo/linux.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25
26 /*
27    How to setup it.
28    ----------------
29
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36
37    That's all. Full PnP 8)
38
39    Applicability.
40    --------------
41
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55
56 struct teql_master {
57         struct Qdisc_ops qops;
58         struct net_device *dev;
59         struct Qdisc *slaves;
60         struct list_head master_list;
61         unsigned long   tx_bytes;
62         unsigned long   tx_packets;
63         unsigned long   tx_errors;
64         unsigned long   tx_dropped;
65 };
66
67 struct teql_sched_data {
68         struct Qdisc *next;
69         struct teql_master *m;
70         struct sk_buff_head q;
71 };
72
73 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
74
75 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
76
77 /* "teql*" qdisc routines */
78
79 static int
80 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
81 {
82         struct net_device *dev = qdisc_dev(sch);
83         struct teql_sched_data *q = qdisc_priv(sch);
84
85         if (q->q.qlen < dev->tx_queue_len) {
86                 __skb_queue_tail(&q->q, skb);
87                 return NET_XMIT_SUCCESS;
88         }
89
90         return qdisc_drop(skb, sch);
91 }
92
93 static struct sk_buff *
94 teql_dequeue(struct Qdisc *sch)
95 {
96         struct teql_sched_data *dat = qdisc_priv(sch);
97         struct netdev_queue *dat_queue;
98         struct sk_buff *skb;
99         struct Qdisc *q;
100
101         skb = __skb_dequeue(&dat->q);
102         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
103         q = rcu_dereference_bh(dat_queue->qdisc);
104
105         if (skb == NULL) {
106                 struct net_device *m = qdisc_dev(q);
107                 if (m) {
108                         dat->m->slaves = sch;
109                         netif_wake_queue(m);
110                 }
111         } else {
112                 qdisc_bstats_update(sch, skb);
113         }
114         sch->q.qlen = dat->q.qlen + q->q.qlen;
115         return skb;
116 }
117
118 static struct sk_buff *
119 teql_peek(struct Qdisc *sch)
120 {
121         /* teql is meant to be used as root qdisc */
122         return NULL;
123 }
124
125 static inline void
126 teql_neigh_release(struct neighbour *n)
127 {
128         if (n)
129                 neigh_release(n);
130 }
131
132 static void
133 teql_reset(struct Qdisc *sch)
134 {
135         struct teql_sched_data *dat = qdisc_priv(sch);
136
137         skb_queue_purge(&dat->q);
138         sch->q.qlen = 0;
139 }
140
141 static void
142 teql_destroy(struct Qdisc *sch)
143 {
144         struct Qdisc *q, *prev;
145         struct teql_sched_data *dat = qdisc_priv(sch);
146         struct teql_master *master = dat->m;
147
148         prev = master->slaves;
149         if (prev) {
150                 do {
151                         q = NEXT_SLAVE(prev);
152                         if (q == sch) {
153                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
154                                 if (q == master->slaves) {
155                                         master->slaves = NEXT_SLAVE(q);
156                                         if (q == master->slaves) {
157                                                 struct netdev_queue *txq;
158                                                 spinlock_t *root_lock;
159
160                                                 txq = netdev_get_tx_queue(master->dev, 0);
161                                                 master->slaves = NULL;
162
163                                                 root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
164                                                 spin_lock_bh(root_lock);
165                                                 qdisc_reset(rtnl_dereference(txq->qdisc));
166                                                 spin_unlock_bh(root_lock);
167                                         }
168                                 }
169                                 skb_queue_purge(&dat->q);
170                                 break;
171                         }
172
173                 } while ((prev = q) != master->slaves);
174         }
175 }
176
177 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
178 {
179         struct net_device *dev = qdisc_dev(sch);
180         struct teql_master *m = (struct teql_master *)sch->ops;
181         struct teql_sched_data *q = qdisc_priv(sch);
182
183         if (dev->hard_header_len > m->dev->hard_header_len)
184                 return -EINVAL;
185
186         if (m->dev == dev)
187                 return -ELOOP;
188
189         q->m = m;
190
191         skb_queue_head_init(&q->q);
192
193         if (m->slaves) {
194                 if (m->dev->flags & IFF_UP) {
195                         if ((m->dev->flags & IFF_POINTOPOINT &&
196                              !(dev->flags & IFF_POINTOPOINT)) ||
197                             (m->dev->flags & IFF_BROADCAST &&
198                              !(dev->flags & IFF_BROADCAST)) ||
199                             (m->dev->flags & IFF_MULTICAST &&
200                              !(dev->flags & IFF_MULTICAST)) ||
201                             dev->mtu < m->dev->mtu)
202                                 return -EINVAL;
203                 } else {
204                         if (!(dev->flags&IFF_POINTOPOINT))
205                                 m->dev->flags &= ~IFF_POINTOPOINT;
206                         if (!(dev->flags&IFF_BROADCAST))
207                                 m->dev->flags &= ~IFF_BROADCAST;
208                         if (!(dev->flags&IFF_MULTICAST))
209                                 m->dev->flags &= ~IFF_MULTICAST;
210                         if (dev->mtu < m->dev->mtu)
211                                 m->dev->mtu = dev->mtu;
212                 }
213                 q->next = NEXT_SLAVE(m->slaves);
214                 NEXT_SLAVE(m->slaves) = sch;
215         } else {
216                 q->next = sch;
217                 m->slaves = sch;
218                 m->dev->mtu = dev->mtu;
219                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
220         }
221         return 0;
222 }
223
224
225 static int
226 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
227                struct net_device *dev, struct netdev_queue *txq,
228                struct dst_entry *dst)
229 {
230         struct neighbour *n;
231         int err = 0;
232
233         n = dst_neigh_lookup_skb(dst, skb);
234         if (!n)
235                 return -ENOENT;
236
237         if (dst->dev != dev) {
238                 struct neighbour *mn;
239
240                 mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
241                 neigh_release(n);
242                 if (IS_ERR(mn))
243                         return PTR_ERR(mn);
244                 n = mn;
245         }
246
247         if (neigh_event_send(n, skb_res) == 0) {
248                 int err;
249                 char haddr[MAX_ADDR_LEN];
250
251                 neigh_ha_snapshot(haddr, n, dev);
252                 err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
253                                       NULL, skb->len);
254
255                 if (err < 0)
256                         err = -EINVAL;
257         } else {
258                 err = (skb_res == NULL) ? -EAGAIN : 1;
259         }
260         neigh_release(n);
261         return err;
262 }
263
264 static inline int teql_resolve(struct sk_buff *skb,
265                                struct sk_buff *skb_res,
266                                struct net_device *dev,
267                                struct netdev_queue *txq)
268 {
269         struct dst_entry *dst = skb_dst(skb);
270         int res;
271
272         if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
273                 return -ENODEV;
274
275         if (!dev->header_ops || !dst)
276                 return 0;
277
278         rcu_read_lock();
279         res = __teql_resolve(skb, skb_res, dev, txq, dst);
280         rcu_read_unlock();
281
282         return res;
283 }
284
285 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
286 {
287         struct teql_master *master = netdev_priv(dev);
288         struct Qdisc *start, *q;
289         int busy;
290         int nores;
291         int subq = skb_get_queue_mapping(skb);
292         struct sk_buff *skb_res = NULL;
293
294         start = master->slaves;
295
296 restart:
297         nores = 0;
298         busy = 0;
299
300         q = start;
301         if (!q)
302                 goto drop;
303
304         do {
305                 struct net_device *slave = qdisc_dev(q);
306                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
307
308                 if (slave_txq->qdisc_sleeping != q)
309                         continue;
310                 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
311                     !netif_running(slave)) {
312                         busy = 1;
313                         continue;
314                 }
315
316                 switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
317                 case 0:
318                         if (__netif_tx_trylock(slave_txq)) {
319                                 unsigned int length = qdisc_pkt_len(skb);
320
321                                 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
322                                     netdev_start_xmit(skb, slave, slave_txq, false) ==
323                                     NETDEV_TX_OK) {
324                                         __netif_tx_unlock(slave_txq);
325                                         master->slaves = NEXT_SLAVE(q);
326                                         netif_wake_queue(dev);
327                                         master->tx_packets++;
328                                         master->tx_bytes += length;
329                                         return NETDEV_TX_OK;
330                                 }
331                                 __netif_tx_unlock(slave_txq);
332                         }
333                         if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
334                                 busy = 1;
335                         break;
336                 case 1:
337                         master->slaves = NEXT_SLAVE(q);
338                         return NETDEV_TX_OK;
339                 default:
340                         nores = 1;
341                         break;
342                 }
343                 __skb_pull(skb, skb_network_offset(skb));
344         } while ((q = NEXT_SLAVE(q)) != start);
345
346         if (nores && skb_res == NULL) {
347                 skb_res = skb;
348                 goto restart;
349         }
350
351         if (busy) {
352                 netif_stop_queue(dev);
353                 return NETDEV_TX_BUSY;
354         }
355         master->tx_errors++;
356
357 drop:
358         master->tx_dropped++;
359         dev_kfree_skb(skb);
360         return NETDEV_TX_OK;
361 }
362
363 static int teql_master_open(struct net_device *dev)
364 {
365         struct Qdisc *q;
366         struct teql_master *m = netdev_priv(dev);
367         int mtu = 0xFFFE;
368         unsigned int flags = IFF_NOARP | IFF_MULTICAST;
369
370         if (m->slaves == NULL)
371                 return -EUNATCH;
372
373         flags = FMASK;
374
375         q = m->slaves;
376         do {
377                 struct net_device *slave = qdisc_dev(q);
378
379                 if (slave == NULL)
380                         return -EUNATCH;
381
382                 if (slave->mtu < mtu)
383                         mtu = slave->mtu;
384                 if (slave->hard_header_len > LL_MAX_HEADER)
385                         return -EINVAL;
386
387                 /* If all the slaves are BROADCAST, master is BROADCAST
388                    If all the slaves are PtP, master is PtP
389                    Otherwise, master is NBMA.
390                  */
391                 if (!(slave->flags&IFF_POINTOPOINT))
392                         flags &= ~IFF_POINTOPOINT;
393                 if (!(slave->flags&IFF_BROADCAST))
394                         flags &= ~IFF_BROADCAST;
395                 if (!(slave->flags&IFF_MULTICAST))
396                         flags &= ~IFF_MULTICAST;
397         } while ((q = NEXT_SLAVE(q)) != m->slaves);
398
399         m->dev->mtu = mtu;
400         m->dev->flags = (m->dev->flags&~FMASK) | flags;
401         netif_start_queue(m->dev);
402         return 0;
403 }
404
405 static int teql_master_close(struct net_device *dev)
406 {
407         netif_stop_queue(dev);
408         return 0;
409 }
410
411 static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
412                                                      struct rtnl_link_stats64 *stats)
413 {
414         struct teql_master *m = netdev_priv(dev);
415
416         stats->tx_packets       = m->tx_packets;
417         stats->tx_bytes         = m->tx_bytes;
418         stats->tx_errors        = m->tx_errors;
419         stats->tx_dropped       = m->tx_dropped;
420         return stats;
421 }
422
423 static int teql_master_mtu(struct net_device *dev, int new_mtu)
424 {
425         struct teql_master *m = netdev_priv(dev);
426         struct Qdisc *q;
427
428         if (new_mtu < 68)
429                 return -EINVAL;
430
431         q = m->slaves;
432         if (q) {
433                 do {
434                         if (new_mtu > qdisc_dev(q)->mtu)
435                                 return -EINVAL;
436                 } while ((q = NEXT_SLAVE(q)) != m->slaves);
437         }
438
439         dev->mtu = new_mtu;
440         return 0;
441 }
442
443 static const struct net_device_ops teql_netdev_ops = {
444         .ndo_open       = teql_master_open,
445         .ndo_stop       = teql_master_close,
446         .ndo_start_xmit = teql_master_xmit,
447         .ndo_get_stats64 = teql_master_stats64,
448         .ndo_change_mtu = teql_master_mtu,
449 };
450
451 static __init void teql_master_setup(struct net_device *dev)
452 {
453         struct teql_master *master = netdev_priv(dev);
454         struct Qdisc_ops *ops = &master->qops;
455
456         master->dev     = dev;
457         ops->priv_size  = sizeof(struct teql_sched_data);
458
459         ops->enqueue    =       teql_enqueue;
460         ops->dequeue    =       teql_dequeue;
461         ops->peek       =       teql_peek;
462         ops->init       =       teql_qdisc_init;
463         ops->reset      =       teql_reset;
464         ops->destroy    =       teql_destroy;
465         ops->owner      =       THIS_MODULE;
466
467         dev->netdev_ops =       &teql_netdev_ops;
468         dev->type               = ARPHRD_VOID;
469         dev->mtu                = 1500;
470         dev->tx_queue_len       = 100;
471         dev->flags              = IFF_NOARP;
472         dev->hard_header_len    = LL_MAX_HEADER;
473         netif_keep_dst(dev);
474 }
475
476 static LIST_HEAD(master_dev_list);
477 static int max_equalizers = 1;
478 module_param(max_equalizers, int, 0);
479 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
480
481 static int __init teql_init(void)
482 {
483         int i;
484         int err = -ENODEV;
485
486         for (i = 0; i < max_equalizers; i++) {
487                 struct net_device *dev;
488                 struct teql_master *master;
489
490                 dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
491                                    NET_NAME_UNKNOWN, teql_master_setup);
492                 if (!dev) {
493                         err = -ENOMEM;
494                         break;
495                 }
496
497                 if ((err = register_netdev(dev))) {
498                         free_netdev(dev);
499                         break;
500                 }
501
502                 master = netdev_priv(dev);
503
504                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
505                 err = register_qdisc(&master->qops);
506
507                 if (err) {
508                         unregister_netdev(dev);
509                         free_netdev(dev);
510                         break;
511                 }
512
513                 list_add_tail(&master->master_list, &master_dev_list);
514         }
515         return i ? 0 : err;
516 }
517
518 static void __exit teql_exit(void)
519 {
520         struct teql_master *master, *nxt;
521
522         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
523
524                 list_del(&master->master_list);
525
526                 unregister_qdisc(&master->qops);
527                 unregister_netdev(master->dev);
528                 free_netdev(master->dev);
529         }
530 }
531
532 module_init(teql_init);
533 module_exit(teql_exit);
534
535 MODULE_LICENSE("GPL");