Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
[cascardo/linux.git] / drivers / net / team / team_mode_loadbalance.c
1 /*
2  * drivers/net/team/team_mode_loadbalance.c - Load-balancing mode for team
3  * Copyright (c) 2012 Jiri Pirko <jpirko@redhat.com>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  */
10
11 #include <linux/kernel.h>
12 #include <linux/types.h>
13 #include <linux/module.h>
14 #include <linux/init.h>
15 #include <linux/errno.h>
16 #include <linux/netdevice.h>
17 #include <linux/filter.h>
18 #include <linux/if_team.h>
19
20 struct lb_priv;
21
22 typedef struct team_port *lb_select_tx_port_func_t(struct team *,
23                                                    struct lb_priv *,
24                                                    struct sk_buff *,
25                                                    unsigned char);
26
27 #define LB_TX_HASHTABLE_SIZE 256 /* hash is a char */
28
29 struct lb_stats {
30         u64 tx_bytes;
31 };
32
33 struct lb_pcpu_stats {
34         struct lb_stats hash_stats[LB_TX_HASHTABLE_SIZE];
35         struct u64_stats_sync syncp;
36 };
37
38 struct lb_stats_info {
39         struct lb_stats stats;
40         struct lb_stats last_stats;
41         struct team_option_inst_info *opt_inst_info;
42 };
43
44 struct lb_port_mapping {
45         struct team_port __rcu *port;
46         struct team_option_inst_info *opt_inst_info;
47 };
48
49 struct lb_priv_ex {
50         struct team *team;
51         struct lb_port_mapping tx_hash_to_port_mapping[LB_TX_HASHTABLE_SIZE];
52         struct sock_fprog *orig_fprog;
53         struct {
54                 unsigned int refresh_interval; /* in tenths of second */
55                 struct delayed_work refresh_dw;
56                 struct lb_stats_info info[LB_TX_HASHTABLE_SIZE];
57         } stats;
58 };
59
60 struct lb_priv {
61         struct sk_filter __rcu *fp;
62         lb_select_tx_port_func_t __rcu *select_tx_port_func;
63         struct lb_pcpu_stats __percpu *pcpu_stats;
64         struct lb_priv_ex *ex; /* priv extension */
65 };
66
67 static struct lb_priv *get_lb_priv(struct team *team)
68 {
69         return (struct lb_priv *) &team->mode_priv;
70 }
71
72 struct lb_port_priv {
73         struct lb_stats __percpu *pcpu_stats;
74         struct lb_stats_info stats_info;
75 };
76
77 static struct lb_port_priv *get_lb_port_priv(struct team_port *port)
78 {
79         return (struct lb_port_priv *) &port->mode_priv;
80 }
81
82 #define LB_HTPM_PORT_BY_HASH(lp_priv, hash) \
83         (lb_priv)->ex->tx_hash_to_port_mapping[hash].port
84
85 #define LB_HTPM_OPT_INST_INFO_BY_HASH(lp_priv, hash) \
86         (lb_priv)->ex->tx_hash_to_port_mapping[hash].opt_inst_info
87
88 static void lb_tx_hash_to_port_mapping_null_port(struct team *team,
89                                                  struct team_port *port)
90 {
91         struct lb_priv *lb_priv = get_lb_priv(team);
92         bool changed = false;
93         int i;
94
95         for (i = 0; i < LB_TX_HASHTABLE_SIZE; i++) {
96                 struct lb_port_mapping *pm;
97
98                 pm = &lb_priv->ex->tx_hash_to_port_mapping[i];
99                 if (rcu_access_pointer(pm->port) == port) {
100                         RCU_INIT_POINTER(pm->port, NULL);
101                         team_option_inst_set_change(pm->opt_inst_info);
102                         changed = true;
103                 }
104         }
105         if (changed)
106                 team_options_change_check(team);
107 }
108
109 /* Basic tx selection based solely by hash */
110 static struct team_port *lb_hash_select_tx_port(struct team *team,
111                                                 struct lb_priv *lb_priv,
112                                                 struct sk_buff *skb,
113                                                 unsigned char hash)
114 {
115         int port_index;
116
117         port_index = hash % team->en_port_count;
118         return team_get_port_by_index_rcu(team, port_index);
119 }
120
121 /* Hash to port mapping select tx port */
122 static struct team_port *lb_htpm_select_tx_port(struct team *team,
123                                                 struct lb_priv *lb_priv,
124                                                 struct sk_buff *skb,
125                                                 unsigned char hash)
126 {
127         return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash));
128 }
129
130 struct lb_select_tx_port {
131         char *name;
132         lb_select_tx_port_func_t *func;
133 };
134
135 static const struct lb_select_tx_port lb_select_tx_port_list[] = {
136         {
137                 .name = "hash",
138                 .func = lb_hash_select_tx_port,
139         },
140         {
141                 .name = "hash_to_port_mapping",
142                 .func = lb_htpm_select_tx_port,
143         },
144 };
145 #define LB_SELECT_TX_PORT_LIST_COUNT ARRAY_SIZE(lb_select_tx_port_list)
146
147 static char *lb_select_tx_port_get_name(lb_select_tx_port_func_t *func)
148 {
149         int i;
150
151         for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) {
152                 const struct lb_select_tx_port *item;
153
154                 item = &lb_select_tx_port_list[i];
155                 if (item->func == func)
156                         return item->name;
157         }
158         return NULL;
159 }
160
161 static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name)
162 {
163         int i;
164
165         for (i = 0; i < LB_SELECT_TX_PORT_LIST_COUNT; i++) {
166                 const struct lb_select_tx_port *item;
167
168                 item = &lb_select_tx_port_list[i];
169                 if (!strcmp(item->name, name))
170                         return item->func;
171         }
172         return NULL;
173 }
174
175 static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv,
176                                     struct sk_buff *skb)
177 {
178         struct sk_filter *fp;
179         uint32_t lhash;
180         unsigned char *c;
181
182         fp = rcu_dereference_bh(lb_priv->fp);
183         if (unlikely(!fp))
184                 return 0;
185         lhash = SK_RUN_FILTER(fp, skb);
186         c = (char *) &lhash;
187         return c[0] ^ c[1] ^ c[2] ^ c[3];
188 }
189
190 static void lb_update_tx_stats(unsigned int tx_bytes, struct lb_priv *lb_priv,
191                                struct lb_port_priv *lb_port_priv,
192                                unsigned char hash)
193 {
194         struct lb_pcpu_stats *pcpu_stats;
195         struct lb_stats *port_stats;
196         struct lb_stats *hash_stats;
197
198         pcpu_stats = this_cpu_ptr(lb_priv->pcpu_stats);
199         port_stats = this_cpu_ptr(lb_port_priv->pcpu_stats);
200         hash_stats = &pcpu_stats->hash_stats[hash];
201         u64_stats_update_begin(&pcpu_stats->syncp);
202         port_stats->tx_bytes += tx_bytes;
203         hash_stats->tx_bytes += tx_bytes;
204         u64_stats_update_end(&pcpu_stats->syncp);
205 }
206
207 static bool lb_transmit(struct team *team, struct sk_buff *skb)
208 {
209         struct lb_priv *lb_priv = get_lb_priv(team);
210         lb_select_tx_port_func_t *select_tx_port_func;
211         struct team_port *port;
212         unsigned char hash;
213         unsigned int tx_bytes = skb->len;
214
215         hash = lb_get_skb_hash(lb_priv, skb);
216         select_tx_port_func = rcu_dereference_bh(lb_priv->select_tx_port_func);
217         port = select_tx_port_func(team, lb_priv, skb, hash);
218         if (unlikely(!port))
219                 goto drop;
220         if (team_dev_queue_xmit(team, port, skb))
221                 return false;
222         lb_update_tx_stats(tx_bytes, lb_priv, get_lb_port_priv(port), hash);
223         return true;
224
225 drop:
226         dev_kfree_skb_any(skb);
227         return false;
228 }
229
230 static int lb_bpf_func_get(struct team *team, struct team_gsetter_ctx *ctx)
231 {
232         struct lb_priv *lb_priv = get_lb_priv(team);
233
234         if (!lb_priv->ex->orig_fprog) {
235                 ctx->data.bin_val.len = 0;
236                 ctx->data.bin_val.ptr = NULL;
237                 return 0;
238         }
239         ctx->data.bin_val.len = lb_priv->ex->orig_fprog->len *
240                                 sizeof(struct sock_filter);
241         ctx->data.bin_val.ptr = lb_priv->ex->orig_fprog->filter;
242         return 0;
243 }
244
245 static int __fprog_create(struct sock_fprog **pfprog, u32 data_len,
246                           const void *data)
247 {
248         struct sock_fprog *fprog;
249         struct sock_filter *filter = (struct sock_filter *) data;
250
251         if (data_len % sizeof(struct sock_filter))
252                 return -EINVAL;
253         fprog = kmalloc(sizeof(struct sock_fprog), GFP_KERNEL);
254         if (!fprog)
255                 return -ENOMEM;
256         fprog->filter = kmemdup(filter, data_len, GFP_KERNEL);
257         if (!fprog->filter) {
258                 kfree(fprog);
259                 return -ENOMEM;
260         }
261         fprog->len = data_len / sizeof(struct sock_filter);
262         *pfprog = fprog;
263         return 0;
264 }
265
266 static void __fprog_destroy(struct sock_fprog *fprog)
267 {
268         kfree(fprog->filter);
269         kfree(fprog);
270 }
271
272 static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
273 {
274         struct lb_priv *lb_priv = get_lb_priv(team);
275         struct sk_filter *fp = NULL;
276         struct sk_filter *orig_fp;
277         struct sock_fprog *fprog = NULL;
278         int err;
279
280         if (ctx->data.bin_val.len) {
281                 err = __fprog_create(&fprog, ctx->data.bin_val.len,
282                                      ctx->data.bin_val.ptr);
283                 if (err)
284                         return err;
285                 err = sk_unattached_filter_create(&fp, fprog);
286                 if (err) {
287                         __fprog_destroy(fprog);
288                         return err;
289                 }
290         }
291
292         if (lb_priv->ex->orig_fprog) {
293                 /* Clear old filter data */
294                 __fprog_destroy(lb_priv->ex->orig_fprog);
295                 orig_fp = rcu_dereference_protected(lb_priv->fp,
296                                                 lockdep_is_held(&team->lock));
297                 sk_unattached_filter_destroy(orig_fp);
298         }
299
300         rcu_assign_pointer(lb_priv->fp, fp);
301         lb_priv->ex->orig_fprog = fprog;
302         return 0;
303 }
304
305 static int lb_tx_method_get(struct team *team, struct team_gsetter_ctx *ctx)
306 {
307         struct lb_priv *lb_priv = get_lb_priv(team);
308         lb_select_tx_port_func_t *func;
309         char *name;
310
311         func = rcu_dereference_protected(lb_priv->select_tx_port_func,
312                                          lockdep_is_held(&team->lock));
313         name = lb_select_tx_port_get_name(func);
314         BUG_ON(!name);
315         ctx->data.str_val = name;
316         return 0;
317 }
318
319 static int lb_tx_method_set(struct team *team, struct team_gsetter_ctx *ctx)
320 {
321         struct lb_priv *lb_priv = get_lb_priv(team);
322         lb_select_tx_port_func_t *func;
323
324         func = lb_select_tx_port_get_func(ctx->data.str_val);
325         if (!func)
326                 return -EINVAL;
327         rcu_assign_pointer(lb_priv->select_tx_port_func, func);
328         return 0;
329 }
330
331 static int lb_tx_hash_to_port_mapping_init(struct team *team,
332                                            struct team_option_inst_info *info)
333 {
334         struct lb_priv *lb_priv = get_lb_priv(team);
335         unsigned char hash = info->array_index;
336
337         LB_HTPM_OPT_INST_INFO_BY_HASH(lb_priv, hash) = info;
338         return 0;
339 }
340
341 static int lb_tx_hash_to_port_mapping_get(struct team *team,
342                                           struct team_gsetter_ctx *ctx)
343 {
344         struct lb_priv *lb_priv = get_lb_priv(team);
345         struct team_port *port;
346         unsigned char hash = ctx->info->array_index;
347
348         port = LB_HTPM_PORT_BY_HASH(lb_priv, hash);
349         ctx->data.u32_val = port ? port->dev->ifindex : 0;
350         return 0;
351 }
352
353 static int lb_tx_hash_to_port_mapping_set(struct team *team,
354                                           struct team_gsetter_ctx *ctx)
355 {
356         struct lb_priv *lb_priv = get_lb_priv(team);
357         struct team_port *port;
358         unsigned char hash = ctx->info->array_index;
359
360         list_for_each_entry(port, &team->port_list, list) {
361                 if (ctx->data.u32_val == port->dev->ifindex &&
362                     team_port_enabled(port)) {
363                         rcu_assign_pointer(LB_HTPM_PORT_BY_HASH(lb_priv, hash),
364                                            port);
365                         return 0;
366                 }
367         }
368         return -ENODEV;
369 }
370
371 static int lb_hash_stats_init(struct team *team,
372                               struct team_option_inst_info *info)
373 {
374         struct lb_priv *lb_priv = get_lb_priv(team);
375         unsigned char hash = info->array_index;
376
377         lb_priv->ex->stats.info[hash].opt_inst_info = info;
378         return 0;
379 }
380
381 static int lb_hash_stats_get(struct team *team, struct team_gsetter_ctx *ctx)
382 {
383         struct lb_priv *lb_priv = get_lb_priv(team);
384         unsigned char hash = ctx->info->array_index;
385
386         ctx->data.bin_val.ptr = &lb_priv->ex->stats.info[hash].stats;
387         ctx->data.bin_val.len = sizeof(struct lb_stats);
388         return 0;
389 }
390
391 static int lb_port_stats_init(struct team *team,
392                               struct team_option_inst_info *info)
393 {
394         struct team_port *port = info->port;
395         struct lb_port_priv *lb_port_priv = get_lb_port_priv(port);
396
397         lb_port_priv->stats_info.opt_inst_info = info;
398         return 0;
399 }
400
401 static int lb_port_stats_get(struct team *team, struct team_gsetter_ctx *ctx)
402 {
403         struct team_port *port = ctx->info->port;
404         struct lb_port_priv *lb_port_priv = get_lb_port_priv(port);
405
406         ctx->data.bin_val.ptr = &lb_port_priv->stats_info.stats;
407         ctx->data.bin_val.len = sizeof(struct lb_stats);
408         return 0;
409 }
410
411 static void __lb_stats_info_refresh_prepare(struct lb_stats_info *s_info)
412 {
413         memcpy(&s_info->last_stats, &s_info->stats, sizeof(struct lb_stats));
414         memset(&s_info->stats, 0, sizeof(struct lb_stats));
415 }
416
417 static bool __lb_stats_info_refresh_check(struct lb_stats_info *s_info,
418                                           struct team *team)
419 {
420         if (memcmp(&s_info->last_stats, &s_info->stats,
421             sizeof(struct lb_stats))) {
422                 team_option_inst_set_change(s_info->opt_inst_info);
423                 return true;
424         }
425         return false;
426 }
427
428 static void __lb_one_cpu_stats_add(struct lb_stats *acc_stats,
429                                    struct lb_stats *cpu_stats,
430                                    struct u64_stats_sync *syncp)
431 {
432         unsigned int start;
433         struct lb_stats tmp;
434
435         do {
436                 start = u64_stats_fetch_begin_bh(syncp);
437                 tmp.tx_bytes = cpu_stats->tx_bytes;
438         } while (u64_stats_fetch_retry_bh(syncp, start));
439         acc_stats->tx_bytes += tmp.tx_bytes;
440 }
441
442 static void lb_stats_refresh(struct work_struct *work)
443 {
444         struct team *team;
445         struct lb_priv *lb_priv;
446         struct lb_priv_ex *lb_priv_ex;
447         struct lb_pcpu_stats *pcpu_stats;
448         struct lb_stats *stats;
449         struct lb_stats_info *s_info;
450         struct team_port *port;
451         bool changed = false;
452         int i;
453         int j;
454
455         lb_priv_ex = container_of(work, struct lb_priv_ex,
456                                   stats.refresh_dw.work);
457
458         team = lb_priv_ex->team;
459         lb_priv = get_lb_priv(team);
460
461         if (!mutex_trylock(&team->lock)) {
462                 schedule_delayed_work(&lb_priv_ex->stats.refresh_dw, 0);
463                 return;
464         }
465
466         for (j = 0; j < LB_TX_HASHTABLE_SIZE; j++) {
467                 s_info = &lb_priv->ex->stats.info[j];
468                 __lb_stats_info_refresh_prepare(s_info);
469                 for_each_possible_cpu(i) {
470                         pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i);
471                         stats = &pcpu_stats->hash_stats[j];
472                         __lb_one_cpu_stats_add(&s_info->stats, stats,
473                                                &pcpu_stats->syncp);
474                 }
475                 changed |= __lb_stats_info_refresh_check(s_info, team);
476         }
477
478         list_for_each_entry(port, &team->port_list, list) {
479                 struct lb_port_priv *lb_port_priv = get_lb_port_priv(port);
480
481                 s_info = &lb_port_priv->stats_info;
482                 __lb_stats_info_refresh_prepare(s_info);
483                 for_each_possible_cpu(i) {
484                         pcpu_stats = per_cpu_ptr(lb_priv->pcpu_stats, i);
485                         stats = per_cpu_ptr(lb_port_priv->pcpu_stats, i);
486                         __lb_one_cpu_stats_add(&s_info->stats, stats,
487                                                &pcpu_stats->syncp);
488                 }
489                 changed |= __lb_stats_info_refresh_check(s_info, team);
490         }
491
492         if (changed)
493                 team_options_change_check(team);
494
495         schedule_delayed_work(&lb_priv_ex->stats.refresh_dw,
496                               (lb_priv_ex->stats.refresh_interval * HZ) / 10);
497
498         mutex_unlock(&team->lock);
499 }
500
501 static int lb_stats_refresh_interval_get(struct team *team,
502                                          struct team_gsetter_ctx *ctx)
503 {
504         struct lb_priv *lb_priv = get_lb_priv(team);
505
506         ctx->data.u32_val = lb_priv->ex->stats.refresh_interval;
507         return 0;
508 }
509
510 static int lb_stats_refresh_interval_set(struct team *team,
511                                          struct team_gsetter_ctx *ctx)
512 {
513         struct lb_priv *lb_priv = get_lb_priv(team);
514         unsigned int interval;
515
516         interval = ctx->data.u32_val;
517         if (lb_priv->ex->stats.refresh_interval == interval)
518                 return 0;
519         lb_priv->ex->stats.refresh_interval = interval;
520         if (interval)
521                 schedule_delayed_work(&lb_priv->ex->stats.refresh_dw, 0);
522         else
523                 cancel_delayed_work(&lb_priv->ex->stats.refresh_dw);
524         return 0;
525 }
526
527 static const struct team_option lb_options[] = {
528         {
529                 .name = "bpf_hash_func",
530                 .type = TEAM_OPTION_TYPE_BINARY,
531                 .getter = lb_bpf_func_get,
532                 .setter = lb_bpf_func_set,
533         },
534         {
535                 .name = "lb_tx_method",
536                 .type = TEAM_OPTION_TYPE_STRING,
537                 .getter = lb_tx_method_get,
538                 .setter = lb_tx_method_set,
539         },
540         {
541                 .name = "lb_tx_hash_to_port_mapping",
542                 .array_size = LB_TX_HASHTABLE_SIZE,
543                 .type = TEAM_OPTION_TYPE_U32,
544                 .init = lb_tx_hash_to_port_mapping_init,
545                 .getter = lb_tx_hash_to_port_mapping_get,
546                 .setter = lb_tx_hash_to_port_mapping_set,
547         },
548         {
549                 .name = "lb_hash_stats",
550                 .array_size = LB_TX_HASHTABLE_SIZE,
551                 .type = TEAM_OPTION_TYPE_BINARY,
552                 .init = lb_hash_stats_init,
553                 .getter = lb_hash_stats_get,
554         },
555         {
556                 .name = "lb_port_stats",
557                 .per_port = true,
558                 .type = TEAM_OPTION_TYPE_BINARY,
559                 .init = lb_port_stats_init,
560                 .getter = lb_port_stats_get,
561         },
562         {
563                 .name = "lb_stats_refresh_interval",
564                 .type = TEAM_OPTION_TYPE_U32,
565                 .getter = lb_stats_refresh_interval_get,
566                 .setter = lb_stats_refresh_interval_set,
567         },
568 };
569
570 static int lb_init(struct team *team)
571 {
572         struct lb_priv *lb_priv = get_lb_priv(team);
573         lb_select_tx_port_func_t *func;
574         int err;
575
576         /* set default tx port selector */
577         func = lb_select_tx_port_get_func("hash");
578         BUG_ON(!func);
579         rcu_assign_pointer(lb_priv->select_tx_port_func, func);
580
581         lb_priv->ex = kzalloc(sizeof(*lb_priv->ex), GFP_KERNEL);
582         if (!lb_priv->ex)
583                 return -ENOMEM;
584         lb_priv->ex->team = team;
585
586         lb_priv->pcpu_stats = alloc_percpu(struct lb_pcpu_stats);
587         if (!lb_priv->pcpu_stats) {
588                 err = -ENOMEM;
589                 goto err_alloc_pcpu_stats;
590         }
591
592         INIT_DELAYED_WORK(&lb_priv->ex->stats.refresh_dw, lb_stats_refresh);
593
594         err = team_options_register(team, lb_options, ARRAY_SIZE(lb_options));
595         if (err)
596                 goto err_options_register;
597         return 0;
598
599 err_options_register:
600         free_percpu(lb_priv->pcpu_stats);
601 err_alloc_pcpu_stats:
602         kfree(lb_priv->ex);
603         return err;
604 }
605
606 static void lb_exit(struct team *team)
607 {
608         struct lb_priv *lb_priv = get_lb_priv(team);
609
610         team_options_unregister(team, lb_options,
611                                 ARRAY_SIZE(lb_options));
612         cancel_delayed_work_sync(&lb_priv->ex->stats.refresh_dw);
613         free_percpu(lb_priv->pcpu_stats);
614         kfree(lb_priv->ex);
615 }
616
617 static int lb_port_enter(struct team *team, struct team_port *port)
618 {
619         struct lb_port_priv *lb_port_priv = get_lb_port_priv(port);
620
621         lb_port_priv->pcpu_stats = alloc_percpu(struct lb_stats);
622         if (!lb_port_priv->pcpu_stats)
623                 return -ENOMEM;
624         return 0;
625 }
626
627 static void lb_port_leave(struct team *team, struct team_port *port)
628 {
629         struct lb_port_priv *lb_port_priv = get_lb_port_priv(port);
630
631         free_percpu(lb_port_priv->pcpu_stats);
632 }
633
634 static void lb_port_disabled(struct team *team, struct team_port *port)
635 {
636         lb_tx_hash_to_port_mapping_null_port(team, port);
637 }
638
639 static const struct team_mode_ops lb_mode_ops = {
640         .init                   = lb_init,
641         .exit                   = lb_exit,
642         .port_enter             = lb_port_enter,
643         .port_leave             = lb_port_leave,
644         .port_disabled          = lb_port_disabled,
645         .transmit               = lb_transmit,
646 };
647
648 static const struct team_mode lb_mode = {
649         .kind           = "loadbalance",
650         .owner          = THIS_MODULE,
651         .priv_size      = sizeof(struct lb_priv),
652         .port_priv_size = sizeof(struct lb_port_priv),
653         .ops            = &lb_mode_ops,
654 };
655
656 static int __init lb_init_module(void)
657 {
658         return team_mode_register(&lb_mode);
659 }
660
661 static void __exit lb_cleanup_module(void)
662 {
663         team_mode_unregister(&lb_mode);
664 }
665
666 module_init(lb_init_module);
667 module_exit(lb_cleanup_module);
668
669 MODULE_LICENSE("GPL v2");
670 MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
671 MODULE_DESCRIPTION("Load-balancing mode for team");
672 MODULE_ALIAS("team-mode-loadbalance");