Merge tag 'gcc-plugins-v4.9-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* sysctl variables */
59
60 #ifdef CONFIG_IP_VS_DEBUG
61 static int sysctl_ip_vs_debug_level = 0;
62
63 int ip_vs_get_debug_level(void)
64 {
65         return sysctl_ip_vs_debug_level;
66 }
67 #endif
68
69
70 /*  Protos */
71 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
72
73
74 #ifdef CONFIG_IP_VS_IPV6
75 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
76 static bool __ip_vs_addr_is_local_v6(struct net *net,
77                                      const struct in6_addr *addr)
78 {
79         struct flowi6 fl6 = {
80                 .daddr = *addr,
81         };
82         struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
83         bool is_local;
84
85         is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
86
87         dst_release(dst);
88         return is_local;
89 }
90 #endif
91
92 #ifdef CONFIG_SYSCTL
93 /*
94  *      update_defense_level is called from keventd and from sysctl,
95  *      so it needs to protect itself from softirqs
96  */
97 static void update_defense_level(struct netns_ipvs *ipvs)
98 {
99         struct sysinfo i;
100         static int old_secure_tcp = 0;
101         int availmem;
102         int nomem;
103         int to_change = -1;
104
105         /* we only count free and buffered memory (in pages) */
106         si_meminfo(&i);
107         availmem = i.freeram + i.bufferram;
108         /* however in linux 2.5 the i.bufferram is total page cache size,
109            we need adjust it */
110         /* si_swapinfo(&i); */
111         /* availmem = availmem - (i.totalswap - i.freeswap); */
112
113         nomem = (availmem < ipvs->sysctl_amemthresh);
114
115         local_bh_disable();
116
117         /* drop_entry */
118         spin_lock(&ipvs->dropentry_lock);
119         switch (ipvs->sysctl_drop_entry) {
120         case 0:
121                 atomic_set(&ipvs->dropentry, 0);
122                 break;
123         case 1:
124                 if (nomem) {
125                         atomic_set(&ipvs->dropentry, 1);
126                         ipvs->sysctl_drop_entry = 2;
127                 } else {
128                         atomic_set(&ipvs->dropentry, 0);
129                 }
130                 break;
131         case 2:
132                 if (nomem) {
133                         atomic_set(&ipvs->dropentry, 1);
134                 } else {
135                         atomic_set(&ipvs->dropentry, 0);
136                         ipvs->sysctl_drop_entry = 1;
137                 };
138                 break;
139         case 3:
140                 atomic_set(&ipvs->dropentry, 1);
141                 break;
142         }
143         spin_unlock(&ipvs->dropentry_lock);
144
145         /* drop_packet */
146         spin_lock(&ipvs->droppacket_lock);
147         switch (ipvs->sysctl_drop_packet) {
148         case 0:
149                 ipvs->drop_rate = 0;
150                 break;
151         case 1:
152                 if (nomem) {
153                         ipvs->drop_rate = ipvs->drop_counter
154                                 = ipvs->sysctl_amemthresh /
155                                 (ipvs->sysctl_amemthresh-availmem);
156                         ipvs->sysctl_drop_packet = 2;
157                 } else {
158                         ipvs->drop_rate = 0;
159                 }
160                 break;
161         case 2:
162                 if (nomem) {
163                         ipvs->drop_rate = ipvs->drop_counter
164                                 = ipvs->sysctl_amemthresh /
165                                 (ipvs->sysctl_amemthresh-availmem);
166                 } else {
167                         ipvs->drop_rate = 0;
168                         ipvs->sysctl_drop_packet = 1;
169                 }
170                 break;
171         case 3:
172                 ipvs->drop_rate = ipvs->sysctl_am_droprate;
173                 break;
174         }
175         spin_unlock(&ipvs->droppacket_lock);
176
177         /* secure_tcp */
178         spin_lock(&ipvs->securetcp_lock);
179         switch (ipvs->sysctl_secure_tcp) {
180         case 0:
181                 if (old_secure_tcp >= 2)
182                         to_change = 0;
183                 break;
184         case 1:
185                 if (nomem) {
186                         if (old_secure_tcp < 2)
187                                 to_change = 1;
188                         ipvs->sysctl_secure_tcp = 2;
189                 } else {
190                         if (old_secure_tcp >= 2)
191                                 to_change = 0;
192                 }
193                 break;
194         case 2:
195                 if (nomem) {
196                         if (old_secure_tcp < 2)
197                                 to_change = 1;
198                 } else {
199                         if (old_secure_tcp >= 2)
200                                 to_change = 0;
201                         ipvs->sysctl_secure_tcp = 1;
202                 }
203                 break;
204         case 3:
205                 if (old_secure_tcp < 2)
206                         to_change = 1;
207                 break;
208         }
209         old_secure_tcp = ipvs->sysctl_secure_tcp;
210         if (to_change >= 0)
211                 ip_vs_protocol_timeout_change(ipvs,
212                                               ipvs->sysctl_secure_tcp > 1);
213         spin_unlock(&ipvs->securetcp_lock);
214
215         local_bh_enable();
216 }
217
218
219 /*
220  *      Timer for checking the defense
221  */
222 #define DEFENSE_TIMER_PERIOD    1*HZ
223
224 static void defense_work_handler(struct work_struct *work)
225 {
226         struct netns_ipvs *ipvs =
227                 container_of(work, struct netns_ipvs, defense_work.work);
228
229         update_defense_level(ipvs);
230         if (atomic_read(&ipvs->dropentry))
231                 ip_vs_random_dropentry(ipvs);
232         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
233 }
234 #endif
235
236 int
237 ip_vs_use_count_inc(void)
238 {
239         return try_module_get(THIS_MODULE);
240 }
241
242 void
243 ip_vs_use_count_dec(void)
244 {
245         module_put(THIS_MODULE);
246 }
247
248
249 /*
250  *      Hash table: for virtual service lookups
251  */
252 #define IP_VS_SVC_TAB_BITS 8
253 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
254 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
255
256 /* the service table hashed by <protocol, addr, port> */
257 static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
258 /* the service table hashed by fwmark */
259 static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
260
261
262 /*
263  *      Returns hash value for virtual service
264  */
265 static inline unsigned int
266 ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
267                   const union nf_inet_addr *addr, __be16 port)
268 {
269         register unsigned int porth = ntohs(port);
270         __be32 addr_fold = addr->ip;
271         __u32 ahash;
272
273 #ifdef CONFIG_IP_VS_IPV6
274         if (af == AF_INET6)
275                 addr_fold = addr->ip6[0]^addr->ip6[1]^
276                             addr->ip6[2]^addr->ip6[3];
277 #endif
278         ahash = ntohl(addr_fold);
279         ahash ^= ((size_t) ipvs >> 8);
280
281         return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
282                IP_VS_SVC_TAB_MASK;
283 }
284
285 /*
286  *      Returns hash value of fwmark for virtual service lookup
287  */
288 static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark)
289 {
290         return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
291 }
292
293 /*
294  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
295  *      or in the ip_vs_svc_fwm_table by fwmark.
296  *      Should be called with locked tables.
297  */
298 static int ip_vs_svc_hash(struct ip_vs_service *svc)
299 {
300         unsigned int hash;
301
302         if (svc->flags & IP_VS_SVC_F_HASHED) {
303                 pr_err("%s(): request for already hashed, called from %pF\n",
304                        __func__, __builtin_return_address(0));
305                 return 0;
306         }
307
308         if (svc->fwmark == 0) {
309                 /*
310                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
311                  */
312                 hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
313                                          &svc->addr, svc->port);
314                 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
315         } else {
316                 /*
317                  *  Hash it by fwmark in svc_fwm_table
318                  */
319                 hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
320                 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
321         }
322
323         svc->flags |= IP_VS_SVC_F_HASHED;
324         /* increase its refcnt because it is referenced by the svc table */
325         atomic_inc(&svc->refcnt);
326         return 1;
327 }
328
329
330 /*
331  *      Unhashes a service from svc_table / svc_fwm_table.
332  *      Should be called with locked tables.
333  */
334 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
335 {
336         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
337                 pr_err("%s(): request for unhash flagged, called from %pF\n",
338                        __func__, __builtin_return_address(0));
339                 return 0;
340         }
341
342         if (svc->fwmark == 0) {
343                 /* Remove it from the svc_table table */
344                 hlist_del_rcu(&svc->s_list);
345         } else {
346                 /* Remove it from the svc_fwm_table table */
347                 hlist_del_rcu(&svc->f_list);
348         }
349
350         svc->flags &= ~IP_VS_SVC_F_HASHED;
351         atomic_dec(&svc->refcnt);
352         return 1;
353 }
354
355
356 /*
357  *      Get service by {netns, proto,addr,port} in the service table.
358  */
359 static inline struct ip_vs_service *
360 __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
361                      const union nf_inet_addr *vaddr, __be16 vport)
362 {
363         unsigned int hash;
364         struct ip_vs_service *svc;
365
366         /* Check for "full" addressed entries */
367         hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport);
368
369         hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
370                 if ((svc->af == af)
371                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
372                     && (svc->port == vport)
373                     && (svc->protocol == protocol)
374                     && (svc->ipvs == ipvs)) {
375                         /* HIT */
376                         return svc;
377                 }
378         }
379
380         return NULL;
381 }
382
383
384 /*
385  *      Get service by {fwmark} in the service table.
386  */
387 static inline struct ip_vs_service *
388 __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
389 {
390         unsigned int hash;
391         struct ip_vs_service *svc;
392
393         /* Check for fwmark addressed entries */
394         hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
395
396         hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
397                 if (svc->fwmark == fwmark && svc->af == af
398                     && (svc->ipvs == ipvs)) {
399                         /* HIT */
400                         return svc;
401                 }
402         }
403
404         return NULL;
405 }
406
407 /* Find service, called under RCU lock */
408 struct ip_vs_service *
409 ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
410                    const union nf_inet_addr *vaddr, __be16 vport)
411 {
412         struct ip_vs_service *svc;
413
414         /*
415          *      Check the table hashed by fwmark first
416          */
417         if (fwmark) {
418                 svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
419                 if (svc)
420                         goto out;
421         }
422
423         /*
424          *      Check the table hashed by <protocol,addr,port>
425          *      for "full" addressed entries
426          */
427         svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
428
429         if (svc == NULL
430             && protocol == IPPROTO_TCP
431             && atomic_read(&ipvs->ftpsvc_counter)
432             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
433                 /*
434                  * Check if ftp service entry exists, the packet
435                  * might belong to FTP data connections.
436                  */
437                 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
438         }
439
440         if (svc == NULL
441             && atomic_read(&ipvs->nullsvc_counter)) {
442                 /*
443                  * Check if the catch-all port (port zero) exists
444                  */
445                 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
446         }
447
448   out:
449         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
450                       fwmark, ip_vs_proto_name(protocol),
451                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
452                       svc ? "hit" : "not hit");
453
454         return svc;
455 }
456
457
458 static inline void
459 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
460 {
461         atomic_inc(&svc->refcnt);
462         rcu_assign_pointer(dest->svc, svc);
463 }
464
465 static void ip_vs_service_free(struct ip_vs_service *svc)
466 {
467         free_percpu(svc->stats.cpustats);
468         kfree(svc);
469 }
470
471 static void ip_vs_service_rcu_free(struct rcu_head *head)
472 {
473         struct ip_vs_service *svc;
474
475         svc = container_of(head, struct ip_vs_service, rcu_head);
476         ip_vs_service_free(svc);
477 }
478
479 static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
480 {
481         if (atomic_dec_and_test(&svc->refcnt)) {
482                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
483                               svc->fwmark,
484                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
485                               ntohs(svc->port));
486                 if (do_delay)
487                         call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
488                 else
489                         ip_vs_service_free(svc);
490         }
491 }
492
493
494 /*
495  *      Returns hash value for real service
496  */
497 static inline unsigned int ip_vs_rs_hashkey(int af,
498                                             const union nf_inet_addr *addr,
499                                             __be16 port)
500 {
501         register unsigned int porth = ntohs(port);
502         __be32 addr_fold = addr->ip;
503
504 #ifdef CONFIG_IP_VS_IPV6
505         if (af == AF_INET6)
506                 addr_fold = addr->ip6[0]^addr->ip6[1]^
507                             addr->ip6[2]^addr->ip6[3];
508 #endif
509
510         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
511                 & IP_VS_RTAB_MASK;
512 }
513
514 /* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
515 static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
516 {
517         unsigned int hash;
518
519         if (dest->in_rs_table)
520                 return;
521
522         /*
523          *      Hash by proto,addr,port,
524          *      which are the parameters of the real service.
525          */
526         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
527
528         hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
529         dest->in_rs_table = 1;
530 }
531
532 /* Unhash ip_vs_dest from rs_table. */
533 static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
534 {
535         /*
536          * Remove it from the rs_table table.
537          */
538         if (dest->in_rs_table) {
539                 hlist_del_rcu(&dest->d_list);
540                 dest->in_rs_table = 0;
541         }
542 }
543
544 /* Check if real service by <proto,addr,port> is present */
545 bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
546                             const union nf_inet_addr *daddr, __be16 dport)
547 {
548         unsigned int hash;
549         struct ip_vs_dest *dest;
550
551         /* Check for "full" addressed entries */
552         hash = ip_vs_rs_hashkey(af, daddr, dport);
553
554         rcu_read_lock();
555         hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
556                 if (dest->port == dport &&
557                     dest->af == af &&
558                     ip_vs_addr_equal(af, &dest->addr, daddr) &&
559                     (dest->protocol == protocol || dest->vfwmark)) {
560                         /* HIT */
561                         rcu_read_unlock();
562                         return true;
563                 }
564         }
565         rcu_read_unlock();
566
567         return false;
568 }
569
570 /* Find real service record by <proto,addr,port>.
571  * In case of multiple records with the same <proto,addr,port>, only
572  * the first found record is returned.
573  *
574  * To be called under RCU lock.
575  */
576 struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
577                                            __u16 protocol,
578                                            const union nf_inet_addr *daddr,
579                                            __be16 dport)
580 {
581         unsigned int hash;
582         struct ip_vs_dest *dest;
583
584         /* Check for "full" addressed entries */
585         hash = ip_vs_rs_hashkey(af, daddr, dport);
586
587         hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
588                 if (dest->port == dport &&
589                     dest->af == af &&
590                     ip_vs_addr_equal(af, &dest->addr, daddr) &&
591                         (dest->protocol == protocol || dest->vfwmark)) {
592                         /* HIT */
593                         return dest;
594                 }
595         }
596
597         return NULL;
598 }
599
600 /* Lookup destination by {addr,port} in the given service
601  * Called under RCU lock.
602  */
603 static struct ip_vs_dest *
604 ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
605                   const union nf_inet_addr *daddr, __be16 dport)
606 {
607         struct ip_vs_dest *dest;
608
609         /*
610          * Find the destination for the given service
611          */
612         list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
613                 if ((dest->af == dest_af) &&
614                     ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
615                     (dest->port == dport)) {
616                         /* HIT */
617                         return dest;
618                 }
619         }
620
621         return NULL;
622 }
623
624 /*
625  * Find destination by {daddr,dport,vaddr,protocol}
626  * Created to be used in ip_vs_process_message() in
627  * the backup synchronization daemon. It finds the
628  * destination to be bound to the received connection
629  * on the backup.
630  * Called under RCU lock, no refcnt is returned.
631  */
632 struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
633                                    const union nf_inet_addr *daddr,
634                                    __be16 dport,
635                                    const union nf_inet_addr *vaddr,
636                                    __be16 vport, __u16 protocol, __u32 fwmark,
637                                    __u32 flags)
638 {
639         struct ip_vs_dest *dest;
640         struct ip_vs_service *svc;
641         __be16 port = dport;
642
643         svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
644         if (!svc)
645                 return NULL;
646         if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
647                 port = 0;
648         dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
649         if (!dest)
650                 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
651         return dest;
652 }
653
654 void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
655 {
656         struct ip_vs_dest_dst *dest_dst = container_of(head,
657                                                        struct ip_vs_dest_dst,
658                                                        rcu_head);
659
660         dst_release(dest_dst->dst_cache);
661         kfree(dest_dst);
662 }
663
664 /* Release dest_dst and dst_cache for dest in user context */
665 static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
666 {
667         struct ip_vs_dest_dst *old;
668
669         old = rcu_dereference_protected(dest->dest_dst, 1);
670         if (old) {
671                 RCU_INIT_POINTER(dest->dest_dst, NULL);
672                 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
673         }
674 }
675
676 /*
677  *  Lookup dest by {svc,addr,port} in the destination trash.
678  *  The destination trash is used to hold the destinations that are removed
679  *  from the service table but are still referenced by some conn entries.
680  *  The reason to add the destination trash is when the dest is temporary
681  *  down (either by administrator or by monitor program), the dest can be
682  *  picked back from the trash, the remaining connections to the dest can
683  *  continue, and the counting information of the dest is also useful for
684  *  scheduling.
685  */
686 static struct ip_vs_dest *
687 ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
688                      const union nf_inet_addr *daddr, __be16 dport)
689 {
690         struct ip_vs_dest *dest;
691         struct netns_ipvs *ipvs = svc->ipvs;
692
693         /*
694          * Find the destination in trash
695          */
696         spin_lock_bh(&ipvs->dest_trash_lock);
697         list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
698                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
699                               "dest->refcnt=%d\n",
700                               dest->vfwmark,
701                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
702                               ntohs(dest->port),
703                               atomic_read(&dest->refcnt));
704                 if (dest->af == dest_af &&
705                     ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
706                     dest->port == dport &&
707                     dest->vfwmark == svc->fwmark &&
708                     dest->protocol == svc->protocol &&
709                     (svc->fwmark ||
710                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
711                       dest->vport == svc->port))) {
712                         /* HIT */
713                         list_del(&dest->t_list);
714                         ip_vs_dest_hold(dest);
715                         goto out;
716                 }
717         }
718
719         dest = NULL;
720
721 out:
722         spin_unlock_bh(&ipvs->dest_trash_lock);
723
724         return dest;
725 }
726
727 static void ip_vs_dest_free(struct ip_vs_dest *dest)
728 {
729         struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
730
731         __ip_vs_dst_cache_reset(dest);
732         __ip_vs_svc_put(svc, false);
733         free_percpu(dest->stats.cpustats);
734         ip_vs_dest_put_and_free(dest);
735 }
736
737 /*
738  *  Clean up all the destinations in the trash
739  *  Called by the ip_vs_control_cleanup()
740  *
741  *  When the ip_vs_control_clearup is activated by ipvs module exit,
742  *  the service tables must have been flushed and all the connections
743  *  are expired, and the refcnt of each destination in the trash must
744  *  be 0, so we simply release them here.
745  */
746 static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
747 {
748         struct ip_vs_dest *dest, *nxt;
749
750         del_timer_sync(&ipvs->dest_trash_timer);
751         /* No need to use dest_trash_lock */
752         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
753                 list_del(&dest->t_list);
754                 ip_vs_dest_free(dest);
755         }
756 }
757
758 static void
759 ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
760 {
761 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
762
763         spin_lock_bh(&src->lock);
764
765         IP_VS_SHOW_STATS_COUNTER(conns);
766         IP_VS_SHOW_STATS_COUNTER(inpkts);
767         IP_VS_SHOW_STATS_COUNTER(outpkts);
768         IP_VS_SHOW_STATS_COUNTER(inbytes);
769         IP_VS_SHOW_STATS_COUNTER(outbytes);
770
771         ip_vs_read_estimator(dst, src);
772
773         spin_unlock_bh(&src->lock);
774 }
775
776 static void
777 ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
778 {
779         dst->conns = (u32)src->conns;
780         dst->inpkts = (u32)src->inpkts;
781         dst->outpkts = (u32)src->outpkts;
782         dst->inbytes = src->inbytes;
783         dst->outbytes = src->outbytes;
784         dst->cps = (u32)src->cps;
785         dst->inpps = (u32)src->inpps;
786         dst->outpps = (u32)src->outpps;
787         dst->inbps = (u32)src->inbps;
788         dst->outbps = (u32)src->outbps;
789 }
790
791 static void
792 ip_vs_zero_stats(struct ip_vs_stats *stats)
793 {
794         spin_lock_bh(&stats->lock);
795
796         /* get current counters as zero point, rates are zeroed */
797
798 #define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
799
800         IP_VS_ZERO_STATS_COUNTER(conns);
801         IP_VS_ZERO_STATS_COUNTER(inpkts);
802         IP_VS_ZERO_STATS_COUNTER(outpkts);
803         IP_VS_ZERO_STATS_COUNTER(inbytes);
804         IP_VS_ZERO_STATS_COUNTER(outbytes);
805
806         ip_vs_zero_estimator(stats);
807
808         spin_unlock_bh(&stats->lock);
809 }
810
811 /*
812  *      Update a destination in the given service
813  */
814 static void
815 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
816                     struct ip_vs_dest_user_kern *udest, int add)
817 {
818         struct netns_ipvs *ipvs = svc->ipvs;
819         struct ip_vs_service *old_svc;
820         struct ip_vs_scheduler *sched;
821         int conn_flags;
822
823         /* We cannot modify an address and change the address family */
824         BUG_ON(!add && udest->af != dest->af);
825
826         if (add && udest->af != svc->af)
827                 ipvs->mixed_address_family_dests++;
828
829         /* set the weight and the flags */
830         atomic_set(&dest->weight, udest->weight);
831         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
832         conn_flags |= IP_VS_CONN_F_INACTIVE;
833
834         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
835         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
836                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
837         } else {
838                 /*
839                  *    Put the real service in rs_table if not present.
840                  *    For now only for NAT!
841                  */
842                 ip_vs_rs_hash(ipvs, dest);
843         }
844         atomic_set(&dest->conn_flags, conn_flags);
845
846         /* bind the service */
847         old_svc = rcu_dereference_protected(dest->svc, 1);
848         if (!old_svc) {
849                 __ip_vs_bind_svc(dest, svc);
850         } else {
851                 if (old_svc != svc) {
852                         ip_vs_zero_stats(&dest->stats);
853                         __ip_vs_bind_svc(dest, svc);
854                         __ip_vs_svc_put(old_svc, true);
855                 }
856         }
857
858         /* set the dest status flags */
859         dest->flags |= IP_VS_DEST_F_AVAILABLE;
860
861         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
862                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
863         dest->u_threshold = udest->u_threshold;
864         dest->l_threshold = udest->l_threshold;
865
866         dest->af = udest->af;
867
868         spin_lock_bh(&dest->dst_lock);
869         __ip_vs_dst_cache_reset(dest);
870         spin_unlock_bh(&dest->dst_lock);
871
872         if (add) {
873                 ip_vs_start_estimator(svc->ipvs, &dest->stats);
874                 list_add_rcu(&dest->n_list, &svc->destinations);
875                 svc->num_dests++;
876                 sched = rcu_dereference_protected(svc->scheduler, 1);
877                 if (sched && sched->add_dest)
878                         sched->add_dest(svc, dest);
879         } else {
880                 sched = rcu_dereference_protected(svc->scheduler, 1);
881                 if (sched && sched->upd_dest)
882                         sched->upd_dest(svc, dest);
883         }
884 }
885
886
887 /*
888  *      Create a destination for the given service
889  */
890 static int
891 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
892                struct ip_vs_dest **dest_p)
893 {
894         struct ip_vs_dest *dest;
895         unsigned int atype, i;
896
897         EnterFunction(2);
898
899 #ifdef CONFIG_IP_VS_IPV6
900         if (udest->af == AF_INET6) {
901                 atype = ipv6_addr_type(&udest->addr.in6);
902                 if ((!(atype & IPV6_ADDR_UNICAST) ||
903                         atype & IPV6_ADDR_LINKLOCAL) &&
904                         !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
905                         return -EINVAL;
906         } else
907 #endif
908         {
909                 atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
910                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
911                         return -EINVAL;
912         }
913
914         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
915         if (dest == NULL)
916                 return -ENOMEM;
917
918         dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
919         if (!dest->stats.cpustats)
920                 goto err_alloc;
921
922         for_each_possible_cpu(i) {
923                 struct ip_vs_cpu_stats *ip_vs_dest_stats;
924                 ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i);
925                 u64_stats_init(&ip_vs_dest_stats->syncp);
926         }
927
928         dest->af = udest->af;
929         dest->protocol = svc->protocol;
930         dest->vaddr = svc->addr;
931         dest->vport = svc->port;
932         dest->vfwmark = svc->fwmark;
933         ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
934         dest->port = udest->port;
935
936         atomic_set(&dest->activeconns, 0);
937         atomic_set(&dest->inactconns, 0);
938         atomic_set(&dest->persistconns, 0);
939         atomic_set(&dest->refcnt, 1);
940
941         INIT_HLIST_NODE(&dest->d_list);
942         spin_lock_init(&dest->dst_lock);
943         spin_lock_init(&dest->stats.lock);
944         __ip_vs_update_dest(svc, dest, udest, 1);
945
946         *dest_p = dest;
947
948         LeaveFunction(2);
949         return 0;
950
951 err_alloc:
952         kfree(dest);
953         return -ENOMEM;
954 }
955
956
957 /*
958  *      Add a destination into an existing service
959  */
960 static int
961 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
962 {
963         struct ip_vs_dest *dest;
964         union nf_inet_addr daddr;
965         __be16 dport = udest->port;
966         int ret;
967
968         EnterFunction(2);
969
970         if (udest->weight < 0) {
971                 pr_err("%s(): server weight less than zero\n", __func__);
972                 return -ERANGE;
973         }
974
975         if (udest->l_threshold > udest->u_threshold) {
976                 pr_err("%s(): lower threshold is higher than upper threshold\n",
977                         __func__);
978                 return -ERANGE;
979         }
980
981         ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
982
983         /* We use function that requires RCU lock */
984         rcu_read_lock();
985         dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
986         rcu_read_unlock();
987
988         if (dest != NULL) {
989                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
990                 return -EEXIST;
991         }
992
993         /*
994          * Check if the dest already exists in the trash and
995          * is from the same service
996          */
997         dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
998
999         if (dest != NULL) {
1000                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
1001                               "dest->refcnt=%d, service %u/%s:%u\n",
1002                               IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
1003                               atomic_read(&dest->refcnt),
1004                               dest->vfwmark,
1005                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
1006                               ntohs(dest->vport));
1007
1008                 __ip_vs_update_dest(svc, dest, udest, 1);
1009                 ret = 0;
1010         } else {
1011                 /*
1012                  * Allocate and initialize the dest structure
1013                  */
1014                 ret = ip_vs_new_dest(svc, udest, &dest);
1015         }
1016         LeaveFunction(2);
1017
1018         return ret;
1019 }
1020
1021
1022 /*
1023  *      Edit a destination in the given service
1024  */
1025 static int
1026 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1027 {
1028         struct ip_vs_dest *dest;
1029         union nf_inet_addr daddr;
1030         __be16 dport = udest->port;
1031
1032         EnterFunction(2);
1033
1034         if (udest->weight < 0) {
1035                 pr_err("%s(): server weight less than zero\n", __func__);
1036                 return -ERANGE;
1037         }
1038
1039         if (udest->l_threshold > udest->u_threshold) {
1040                 pr_err("%s(): lower threshold is higher than upper threshold\n",
1041                         __func__);
1042                 return -ERANGE;
1043         }
1044
1045         ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
1046
1047         /* We use function that requires RCU lock */
1048         rcu_read_lock();
1049         dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
1050         rcu_read_unlock();
1051
1052         if (dest == NULL) {
1053                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1054                 return -ENOENT;
1055         }
1056
1057         __ip_vs_update_dest(svc, dest, udest, 0);
1058         LeaveFunction(2);
1059
1060         return 0;
1061 }
1062
1063 /*
1064  *      Delete a destination (must be already unlinked from the service)
1065  */
1066 static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
1067                              bool cleanup)
1068 {
1069         ip_vs_stop_estimator(ipvs, &dest->stats);
1070
1071         /*
1072          *  Remove it from the d-linked list with the real services.
1073          */
1074         ip_vs_rs_unhash(dest);
1075
1076         spin_lock_bh(&ipvs->dest_trash_lock);
1077         IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1078                       IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1079                       atomic_read(&dest->refcnt));
1080         if (list_empty(&ipvs->dest_trash) && !cleanup)
1081                 mod_timer(&ipvs->dest_trash_timer,
1082                           jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1083         /* dest lives in trash without reference */
1084         list_add(&dest->t_list, &ipvs->dest_trash);
1085         dest->idle_start = 0;
1086         spin_unlock_bh(&ipvs->dest_trash_lock);
1087         ip_vs_dest_put(dest);
1088 }
1089
1090
1091 /*
1092  *      Unlink a destination from the given service
1093  */
1094 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1095                                 struct ip_vs_dest *dest,
1096                                 int svcupd)
1097 {
1098         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1099
1100         /*
1101          *  Remove it from the d-linked destination list.
1102          */
1103         list_del_rcu(&dest->n_list);
1104         svc->num_dests--;
1105
1106         if (dest->af != svc->af)
1107                 svc->ipvs->mixed_address_family_dests--;
1108
1109         if (svcupd) {
1110                 struct ip_vs_scheduler *sched;
1111
1112                 sched = rcu_dereference_protected(svc->scheduler, 1);
1113                 if (sched && sched->del_dest)
1114                         sched->del_dest(svc, dest);
1115         }
1116 }
1117
1118
1119 /*
1120  *      Delete a destination server in the given service
1121  */
1122 static int
1123 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1124 {
1125         struct ip_vs_dest *dest;
1126         __be16 dport = udest->port;
1127
1128         EnterFunction(2);
1129
1130         /* We use function that requires RCU lock */
1131         rcu_read_lock();
1132         dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
1133         rcu_read_unlock();
1134
1135         if (dest == NULL) {
1136                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1137                 return -ENOENT;
1138         }
1139
1140         /*
1141          *      Unlink dest from the service
1142          */
1143         __ip_vs_unlink_dest(svc, dest, 1);
1144
1145         /*
1146          *      Delete the destination
1147          */
1148         __ip_vs_del_dest(svc->ipvs, dest, false);
1149
1150         LeaveFunction(2);
1151
1152         return 0;
1153 }
1154
1155 static void ip_vs_dest_trash_expire(unsigned long data)
1156 {
1157         struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
1158         struct ip_vs_dest *dest, *next;
1159         unsigned long now = jiffies;
1160
1161         spin_lock(&ipvs->dest_trash_lock);
1162         list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1163                 if (atomic_read(&dest->refcnt) > 0)
1164                         continue;
1165                 if (dest->idle_start) {
1166                         if (time_before(now, dest->idle_start +
1167                                              IP_VS_DEST_TRASH_PERIOD))
1168                                 continue;
1169                 } else {
1170                         dest->idle_start = max(1UL, now);
1171                         continue;
1172                 }
1173                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1174                               dest->vfwmark,
1175                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1176                               ntohs(dest->port));
1177                 list_del(&dest->t_list);
1178                 ip_vs_dest_free(dest);
1179         }
1180         if (!list_empty(&ipvs->dest_trash))
1181                 mod_timer(&ipvs->dest_trash_timer,
1182                           jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1183         spin_unlock(&ipvs->dest_trash_lock);
1184 }
1185
1186 /*
1187  *      Add a service into the service hash table
1188  */
1189 static int
1190 ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
1191                   struct ip_vs_service **svc_p)
1192 {
1193         int ret = 0, i;
1194         struct ip_vs_scheduler *sched = NULL;
1195         struct ip_vs_pe *pe = NULL;
1196         struct ip_vs_service *svc = NULL;
1197
1198         /* increase the module use count */
1199         ip_vs_use_count_inc();
1200
1201         /* Lookup the scheduler by 'u->sched_name' */
1202         if (strcmp(u->sched_name, "none")) {
1203                 sched = ip_vs_scheduler_get(u->sched_name);
1204                 if (!sched) {
1205                         pr_info("Scheduler module ip_vs_%s not found\n",
1206                                 u->sched_name);
1207                         ret = -ENOENT;
1208                         goto out_err;
1209                 }
1210         }
1211
1212         if (u->pe_name && *u->pe_name) {
1213                 pe = ip_vs_pe_getbyname(u->pe_name);
1214                 if (pe == NULL) {
1215                         pr_info("persistence engine module ip_vs_pe_%s "
1216                                 "not found\n", u->pe_name);
1217                         ret = -ENOENT;
1218                         goto out_err;
1219                 }
1220         }
1221
1222 #ifdef CONFIG_IP_VS_IPV6
1223         if (u->af == AF_INET6) {
1224                 __u32 plen = (__force __u32) u->netmask;
1225
1226                 if (plen < 1 || plen > 128) {
1227                         ret = -EINVAL;
1228                         goto out_err;
1229                 }
1230         }
1231 #endif
1232
1233         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1234         if (svc == NULL) {
1235                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1236                 ret = -ENOMEM;
1237                 goto out_err;
1238         }
1239         svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1240         if (!svc->stats.cpustats) {
1241                 ret = -ENOMEM;
1242                 goto out_err;
1243         }
1244
1245         for_each_possible_cpu(i) {
1246                 struct ip_vs_cpu_stats *ip_vs_stats;
1247                 ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i);
1248                 u64_stats_init(&ip_vs_stats->syncp);
1249         }
1250
1251
1252         /* I'm the first user of the service */
1253         atomic_set(&svc->refcnt, 0);
1254
1255         svc->af = u->af;
1256         svc->protocol = u->protocol;
1257         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1258         svc->port = u->port;
1259         svc->fwmark = u->fwmark;
1260         svc->flags = u->flags;
1261         svc->timeout = u->timeout * HZ;
1262         svc->netmask = u->netmask;
1263         svc->ipvs = ipvs;
1264
1265         INIT_LIST_HEAD(&svc->destinations);
1266         spin_lock_init(&svc->sched_lock);
1267         spin_lock_init(&svc->stats.lock);
1268
1269         /* Bind the scheduler */
1270         if (sched) {
1271                 ret = ip_vs_bind_scheduler(svc, sched);
1272                 if (ret)
1273                         goto out_err;
1274                 sched = NULL;
1275         }
1276
1277         /* Bind the ct retriever */
1278         RCU_INIT_POINTER(svc->pe, pe);
1279         pe = NULL;
1280
1281         /* Update the virtual service counters */
1282         if (svc->port == FTPPORT)
1283                 atomic_inc(&ipvs->ftpsvc_counter);
1284         else if (svc->port == 0)
1285                 atomic_inc(&ipvs->nullsvc_counter);
1286         if (svc->pe && svc->pe->conn_out)
1287                 atomic_inc(&ipvs->conn_out_counter);
1288
1289         ip_vs_start_estimator(ipvs, &svc->stats);
1290
1291         /* Count only IPv4 services for old get/setsockopt interface */
1292         if (svc->af == AF_INET)
1293                 ipvs->num_services++;
1294
1295         /* Hash the service into the service table */
1296         ip_vs_svc_hash(svc);
1297
1298         *svc_p = svc;
1299         /* Now there is a service - full throttle */
1300         ipvs->enable = 1;
1301         return 0;
1302
1303
1304  out_err:
1305         if (svc != NULL) {
1306                 ip_vs_unbind_scheduler(svc, sched);
1307                 ip_vs_service_free(svc);
1308         }
1309         ip_vs_scheduler_put(sched);
1310         ip_vs_pe_put(pe);
1311
1312         /* decrease the module use count */
1313         ip_vs_use_count_dec();
1314
1315         return ret;
1316 }
1317
1318
1319 /*
1320  *      Edit a service and bind it with a new scheduler
1321  */
1322 static int
1323 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1324 {
1325         struct ip_vs_scheduler *sched = NULL, *old_sched;
1326         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1327         int ret = 0;
1328         bool new_pe_conn_out, old_pe_conn_out;
1329
1330         /*
1331          * Lookup the scheduler, by 'u->sched_name'
1332          */
1333         if (strcmp(u->sched_name, "none")) {
1334                 sched = ip_vs_scheduler_get(u->sched_name);
1335                 if (!sched) {
1336                         pr_info("Scheduler module ip_vs_%s not found\n",
1337                                 u->sched_name);
1338                         return -ENOENT;
1339                 }
1340         }
1341         old_sched = sched;
1342
1343         if (u->pe_name && *u->pe_name) {
1344                 pe = ip_vs_pe_getbyname(u->pe_name);
1345                 if (pe == NULL) {
1346                         pr_info("persistence engine module ip_vs_pe_%s "
1347                                 "not found\n", u->pe_name);
1348                         ret = -ENOENT;
1349                         goto out;
1350                 }
1351                 old_pe = pe;
1352         }
1353
1354 #ifdef CONFIG_IP_VS_IPV6
1355         if (u->af == AF_INET6) {
1356                 __u32 plen = (__force __u32) u->netmask;
1357
1358                 if (plen < 1 || plen > 128) {
1359                         ret = -EINVAL;
1360                         goto out;
1361                 }
1362         }
1363 #endif
1364
1365         old_sched = rcu_dereference_protected(svc->scheduler, 1);
1366         if (sched != old_sched) {
1367                 if (old_sched) {
1368                         ip_vs_unbind_scheduler(svc, old_sched);
1369                         RCU_INIT_POINTER(svc->scheduler, NULL);
1370                         /* Wait all svc->sched_data users */
1371                         synchronize_rcu();
1372                 }
1373                 /* Bind the new scheduler */
1374                 if (sched) {
1375                         ret = ip_vs_bind_scheduler(svc, sched);
1376                         if (ret) {
1377                                 ip_vs_scheduler_put(sched);
1378                                 goto out;
1379                         }
1380                 }
1381         }
1382
1383         /*
1384          * Set the flags and timeout value
1385          */
1386         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1387         svc->timeout = u->timeout * HZ;
1388         svc->netmask = u->netmask;
1389
1390         old_pe = rcu_dereference_protected(svc->pe, 1);
1391         if (pe != old_pe) {
1392                 rcu_assign_pointer(svc->pe, pe);
1393                 /* check for optional methods in new pe */
1394                 new_pe_conn_out = (pe && pe->conn_out) ? true : false;
1395                 old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false;
1396                 if (new_pe_conn_out && !old_pe_conn_out)
1397                         atomic_inc(&svc->ipvs->conn_out_counter);
1398                 if (old_pe_conn_out && !new_pe_conn_out)
1399                         atomic_dec(&svc->ipvs->conn_out_counter);
1400         }
1401
1402 out:
1403         ip_vs_scheduler_put(old_sched);
1404         ip_vs_pe_put(old_pe);
1405         return ret;
1406 }
1407
1408 /*
1409  *      Delete a service from the service list
1410  *      - The service must be unlinked, unlocked and not referenced!
1411  *      - We are called under _bh lock
1412  */
1413 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1414 {
1415         struct ip_vs_dest *dest, *nxt;
1416         struct ip_vs_scheduler *old_sched;
1417         struct ip_vs_pe *old_pe;
1418         struct netns_ipvs *ipvs = svc->ipvs;
1419
1420         /* Count only IPv4 services for old get/setsockopt interface */
1421         if (svc->af == AF_INET)
1422                 ipvs->num_services--;
1423
1424         ip_vs_stop_estimator(svc->ipvs, &svc->stats);
1425
1426         /* Unbind scheduler */
1427         old_sched = rcu_dereference_protected(svc->scheduler, 1);
1428         ip_vs_unbind_scheduler(svc, old_sched);
1429         ip_vs_scheduler_put(old_sched);
1430
1431         /* Unbind persistence engine, keep svc->pe */
1432         old_pe = rcu_dereference_protected(svc->pe, 1);
1433         if (old_pe && old_pe->conn_out)
1434                 atomic_dec(&ipvs->conn_out_counter);
1435         ip_vs_pe_put(old_pe);
1436
1437         /*
1438          *    Unlink the whole destination list
1439          */
1440         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1441                 __ip_vs_unlink_dest(svc, dest, 0);
1442                 __ip_vs_del_dest(svc->ipvs, dest, cleanup);
1443         }
1444
1445         /*
1446          *    Update the virtual service counters
1447          */
1448         if (svc->port == FTPPORT)
1449                 atomic_dec(&ipvs->ftpsvc_counter);
1450         else if (svc->port == 0)
1451                 atomic_dec(&ipvs->nullsvc_counter);
1452
1453         /*
1454          *    Free the service if nobody refers to it
1455          */
1456         __ip_vs_svc_put(svc, true);
1457
1458         /* decrease the module use count */
1459         ip_vs_use_count_dec();
1460 }
1461
1462 /*
1463  * Unlink a service from list and try to delete it if its refcnt reached 0
1464  */
1465 static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
1466 {
1467         /* Hold svc to avoid double release from dest_trash */
1468         atomic_inc(&svc->refcnt);
1469         /*
1470          * Unhash it from the service table
1471          */
1472         ip_vs_svc_unhash(svc);
1473
1474         __ip_vs_del_service(svc, cleanup);
1475 }
1476
1477 /*
1478  *      Delete a service from the service list
1479  */
1480 static int ip_vs_del_service(struct ip_vs_service *svc)
1481 {
1482         if (svc == NULL)
1483                 return -EEXIST;
1484         ip_vs_unlink_service(svc, false);
1485
1486         return 0;
1487 }
1488
1489
1490 /*
1491  *      Flush all the virtual services
1492  */
1493 static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
1494 {
1495         int idx;
1496         struct ip_vs_service *svc;
1497         struct hlist_node *n;
1498
1499         /*
1500          * Flush the service table hashed by <netns,protocol,addr,port>
1501          */
1502         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1503                 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
1504                                           s_list) {
1505                         if (svc->ipvs == ipvs)
1506                                 ip_vs_unlink_service(svc, cleanup);
1507                 }
1508         }
1509
1510         /*
1511          * Flush the service table hashed by fwmark
1512          */
1513         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1514                 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
1515                                           f_list) {
1516                         if (svc->ipvs == ipvs)
1517                                 ip_vs_unlink_service(svc, cleanup);
1518                 }
1519         }
1520
1521         return 0;
1522 }
1523
1524 /*
1525  *      Delete service by {netns} in the service table.
1526  *      Called by __ip_vs_cleanup()
1527  */
1528 void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
1529 {
1530         EnterFunction(2);
1531         /* Check for "full" addressed entries */
1532         mutex_lock(&__ip_vs_mutex);
1533         ip_vs_flush(ipvs, true);
1534         mutex_unlock(&__ip_vs_mutex);
1535         LeaveFunction(2);
1536 }
1537
1538 /* Put all references for device (dst_cache) */
1539 static inline void
1540 ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
1541 {
1542         struct ip_vs_dest_dst *dest_dst;
1543
1544         spin_lock_bh(&dest->dst_lock);
1545         dest_dst = rcu_dereference_protected(dest->dest_dst, 1);
1546         if (dest_dst && dest_dst->dst_cache->dev == dev) {
1547                 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1548                               dev->name,
1549                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1550                               ntohs(dest->port),
1551                               atomic_read(&dest->refcnt));
1552                 __ip_vs_dst_cache_reset(dest);
1553         }
1554         spin_unlock_bh(&dest->dst_lock);
1555
1556 }
1557 /* Netdev event receiver
1558  * Currently only NETDEV_DOWN is handled to release refs to cached dsts
1559  */
1560 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1561                            void *ptr)
1562 {
1563         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1564         struct net *net = dev_net(dev);
1565         struct netns_ipvs *ipvs = net_ipvs(net);
1566         struct ip_vs_service *svc;
1567         struct ip_vs_dest *dest;
1568         unsigned int idx;
1569
1570         if (event != NETDEV_DOWN || !ipvs)
1571                 return NOTIFY_DONE;
1572         IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1573         EnterFunction(2);
1574         mutex_lock(&__ip_vs_mutex);
1575         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1576                 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1577                         if (svc->ipvs == ipvs) {
1578                                 list_for_each_entry(dest, &svc->destinations,
1579                                                     n_list) {
1580                                         ip_vs_forget_dev(dest, dev);
1581                                 }
1582                         }
1583                 }
1584
1585                 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1586                         if (svc->ipvs == ipvs) {
1587                                 list_for_each_entry(dest, &svc->destinations,
1588                                                     n_list) {
1589                                         ip_vs_forget_dev(dest, dev);
1590                                 }
1591                         }
1592
1593                 }
1594         }
1595
1596         spin_lock_bh(&ipvs->dest_trash_lock);
1597         list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
1598                 ip_vs_forget_dev(dest, dev);
1599         }
1600         spin_unlock_bh(&ipvs->dest_trash_lock);
1601         mutex_unlock(&__ip_vs_mutex);
1602         LeaveFunction(2);
1603         return NOTIFY_DONE;
1604 }
1605
1606 /*
1607  *      Zero counters in a service or all services
1608  */
1609 static int ip_vs_zero_service(struct ip_vs_service *svc)
1610 {
1611         struct ip_vs_dest *dest;
1612
1613         list_for_each_entry(dest, &svc->destinations, n_list) {
1614                 ip_vs_zero_stats(&dest->stats);
1615         }
1616         ip_vs_zero_stats(&svc->stats);
1617         return 0;
1618 }
1619
1620 static int ip_vs_zero_all(struct netns_ipvs *ipvs)
1621 {
1622         int idx;
1623         struct ip_vs_service *svc;
1624
1625         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1626                 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1627                         if (svc->ipvs == ipvs)
1628                                 ip_vs_zero_service(svc);
1629                 }
1630         }
1631
1632         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1633                 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1634                         if (svc->ipvs == ipvs)
1635                                 ip_vs_zero_service(svc);
1636                 }
1637         }
1638
1639         ip_vs_zero_stats(&ipvs->tot_stats);
1640         return 0;
1641 }
1642
1643 #ifdef CONFIG_SYSCTL
1644
1645 static int zero;
1646 static int three = 3;
1647
1648 static int
1649 proc_do_defense_mode(struct ctl_table *table, int write,
1650                      void __user *buffer, size_t *lenp, loff_t *ppos)
1651 {
1652         struct netns_ipvs *ipvs = table->extra2;
1653         int *valp = table->data;
1654         int val = *valp;
1655         int rc;
1656
1657         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1658         if (write && (*valp != val)) {
1659                 if ((*valp < 0) || (*valp > 3)) {
1660                         /* Restore the correct value */
1661                         *valp = val;
1662                 } else {
1663                         update_defense_level(ipvs);
1664                 }
1665         }
1666         return rc;
1667 }
1668
1669 static int
1670 proc_do_sync_threshold(struct ctl_table *table, int write,
1671                        void __user *buffer, size_t *lenp, loff_t *ppos)
1672 {
1673         int *valp = table->data;
1674         int val[2];
1675         int rc;
1676
1677         /* backup the value first */
1678         memcpy(val, valp, sizeof(val));
1679
1680         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1681         if (write && (valp[0] < 0 || valp[1] < 0 ||
1682             (valp[0] >= valp[1] && valp[1]))) {
1683                 /* Restore the correct value */
1684                 memcpy(valp, val, sizeof(val));
1685         }
1686         return rc;
1687 }
1688
1689 static int
1690 proc_do_sync_mode(struct ctl_table *table, int write,
1691                      void __user *buffer, size_t *lenp, loff_t *ppos)
1692 {
1693         int *valp = table->data;
1694         int val = *valp;
1695         int rc;
1696
1697         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1698         if (write && (*valp != val)) {
1699                 if ((*valp < 0) || (*valp > 1)) {
1700                         /* Restore the correct value */
1701                         *valp = val;
1702                 }
1703         }
1704         return rc;
1705 }
1706
1707 static int
1708 proc_do_sync_ports(struct ctl_table *table, int write,
1709                    void __user *buffer, size_t *lenp, loff_t *ppos)
1710 {
1711         int *valp = table->data;
1712         int val = *valp;
1713         int rc;
1714
1715         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1716         if (write && (*valp != val)) {
1717                 if (*valp < 1 || !is_power_of_2(*valp)) {
1718                         /* Restore the correct value */
1719                         *valp = val;
1720                 }
1721         }
1722         return rc;
1723 }
1724
1725 /*
1726  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1727  *      Do not change order or insert new entries without
1728  *      align with netns init in ip_vs_control_net_init()
1729  */
1730
1731 static struct ctl_table vs_vars[] = {
1732         {
1733                 .procname       = "amemthresh",
1734                 .maxlen         = sizeof(int),
1735                 .mode           = 0644,
1736                 .proc_handler   = proc_dointvec,
1737         },
1738         {
1739                 .procname       = "am_droprate",
1740                 .maxlen         = sizeof(int),
1741                 .mode           = 0644,
1742                 .proc_handler   = proc_dointvec,
1743         },
1744         {
1745                 .procname       = "drop_entry",
1746                 .maxlen         = sizeof(int),
1747                 .mode           = 0644,
1748                 .proc_handler   = proc_do_defense_mode,
1749         },
1750         {
1751                 .procname       = "drop_packet",
1752                 .maxlen         = sizeof(int),
1753                 .mode           = 0644,
1754                 .proc_handler   = proc_do_defense_mode,
1755         },
1756 #ifdef CONFIG_IP_VS_NFCT
1757         {
1758                 .procname       = "conntrack",
1759                 .maxlen         = sizeof(int),
1760                 .mode           = 0644,
1761                 .proc_handler   = &proc_dointvec,
1762         },
1763 #endif
1764         {
1765                 .procname       = "secure_tcp",
1766                 .maxlen         = sizeof(int),
1767                 .mode           = 0644,
1768                 .proc_handler   = proc_do_defense_mode,
1769         },
1770         {
1771                 .procname       = "snat_reroute",
1772                 .maxlen         = sizeof(int),
1773                 .mode           = 0644,
1774                 .proc_handler   = &proc_dointvec,
1775         },
1776         {
1777                 .procname       = "sync_version",
1778                 .maxlen         = sizeof(int),
1779                 .mode           = 0644,
1780                 .proc_handler   = &proc_do_sync_mode,
1781         },
1782         {
1783                 .procname       = "sync_ports",
1784                 .maxlen         = sizeof(int),
1785                 .mode           = 0644,
1786                 .proc_handler   = &proc_do_sync_ports,
1787         },
1788         {
1789                 .procname       = "sync_persist_mode",
1790                 .maxlen         = sizeof(int),
1791                 .mode           = 0644,
1792                 .proc_handler   = proc_dointvec,
1793         },
1794         {
1795                 .procname       = "sync_qlen_max",
1796                 .maxlen         = sizeof(unsigned long),
1797                 .mode           = 0644,
1798                 .proc_handler   = proc_doulongvec_minmax,
1799         },
1800         {
1801                 .procname       = "sync_sock_size",
1802                 .maxlen         = sizeof(int),
1803                 .mode           = 0644,
1804                 .proc_handler   = proc_dointvec,
1805         },
1806         {
1807                 .procname       = "cache_bypass",
1808                 .maxlen         = sizeof(int),
1809                 .mode           = 0644,
1810                 .proc_handler   = proc_dointvec,
1811         },
1812         {
1813                 .procname       = "expire_nodest_conn",
1814                 .maxlen         = sizeof(int),
1815                 .mode           = 0644,
1816                 .proc_handler   = proc_dointvec,
1817         },
1818         {
1819                 .procname       = "sloppy_tcp",
1820                 .maxlen         = sizeof(int),
1821                 .mode           = 0644,
1822                 .proc_handler   = proc_dointvec,
1823         },
1824         {
1825                 .procname       = "sloppy_sctp",
1826                 .maxlen         = sizeof(int),
1827                 .mode           = 0644,
1828                 .proc_handler   = proc_dointvec,
1829         },
1830         {
1831                 .procname       = "expire_quiescent_template",
1832                 .maxlen         = sizeof(int),
1833                 .mode           = 0644,
1834                 .proc_handler   = proc_dointvec,
1835         },
1836         {
1837                 .procname       = "sync_threshold",
1838                 .maxlen         =
1839                         sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1840                 .mode           = 0644,
1841                 .proc_handler   = proc_do_sync_threshold,
1842         },
1843         {
1844                 .procname       = "sync_refresh_period",
1845                 .maxlen         = sizeof(int),
1846                 .mode           = 0644,
1847                 .proc_handler   = proc_dointvec_jiffies,
1848         },
1849         {
1850                 .procname       = "sync_retries",
1851                 .maxlen         = sizeof(int),
1852                 .mode           = 0644,
1853                 .proc_handler   = proc_dointvec_minmax,
1854                 .extra1         = &zero,
1855                 .extra2         = &three,
1856         },
1857         {
1858                 .procname       = "nat_icmp_send",
1859                 .maxlen         = sizeof(int),
1860                 .mode           = 0644,
1861                 .proc_handler   = proc_dointvec,
1862         },
1863         {
1864                 .procname       = "pmtu_disc",
1865                 .maxlen         = sizeof(int),
1866                 .mode           = 0644,
1867                 .proc_handler   = proc_dointvec,
1868         },
1869         {
1870                 .procname       = "backup_only",
1871                 .maxlen         = sizeof(int),
1872                 .mode           = 0644,
1873                 .proc_handler   = proc_dointvec,
1874         },
1875         {
1876                 .procname       = "conn_reuse_mode",
1877                 .maxlen         = sizeof(int),
1878                 .mode           = 0644,
1879                 .proc_handler   = proc_dointvec,
1880         },
1881         {
1882                 .procname       = "schedule_icmp",
1883                 .maxlen         = sizeof(int),
1884                 .mode           = 0644,
1885                 .proc_handler   = proc_dointvec,
1886         },
1887         {
1888                 .procname       = "ignore_tunneled",
1889                 .maxlen         = sizeof(int),
1890                 .mode           = 0644,
1891                 .proc_handler   = proc_dointvec,
1892         },
1893 #ifdef CONFIG_IP_VS_DEBUG
1894         {
1895                 .procname       = "debug_level",
1896                 .data           = &sysctl_ip_vs_debug_level,
1897                 .maxlen         = sizeof(int),
1898                 .mode           = 0644,
1899                 .proc_handler   = proc_dointvec,
1900         },
1901 #endif
1902         { }
1903 };
1904
1905 #endif
1906
1907 #ifdef CONFIG_PROC_FS
1908
1909 struct ip_vs_iter {
1910         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1911         struct hlist_head *table;
1912         int bucket;
1913 };
1914
1915 /*
1916  *      Write the contents of the VS rule table to a PROCfs file.
1917  *      (It is kept just for backward compatibility)
1918  */
1919 static inline const char *ip_vs_fwd_name(unsigned int flags)
1920 {
1921         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1922         case IP_VS_CONN_F_LOCALNODE:
1923                 return "Local";
1924         case IP_VS_CONN_F_TUNNEL:
1925                 return "Tunnel";
1926         case IP_VS_CONN_F_DROUTE:
1927                 return "Route";
1928         default:
1929                 return "Masq";
1930         }
1931 }
1932
1933
1934 /* Get the Nth entry in the two lists */
1935 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1936 {
1937         struct net *net = seq_file_net(seq);
1938         struct netns_ipvs *ipvs = net_ipvs(net);
1939         struct ip_vs_iter *iter = seq->private;
1940         int idx;
1941         struct ip_vs_service *svc;
1942
1943         /* look in hash by protocol */
1944         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1945                 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
1946                         if ((svc->ipvs == ipvs) && pos-- == 0) {
1947                                 iter->table = ip_vs_svc_table;
1948                                 iter->bucket = idx;
1949                                 return svc;
1950                         }
1951                 }
1952         }
1953
1954         /* keep looking in fwmark */
1955         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1956                 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
1957                                          f_list) {
1958                         if ((svc->ipvs == ipvs) && pos-- == 0) {
1959                                 iter->table = ip_vs_svc_fwm_table;
1960                                 iter->bucket = idx;
1961                                 return svc;
1962                         }
1963                 }
1964         }
1965
1966         return NULL;
1967 }
1968
1969 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1970         __acquires(RCU)
1971 {
1972         rcu_read_lock();
1973         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1974 }
1975
1976
1977 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1978 {
1979         struct hlist_node *e;
1980         struct ip_vs_iter *iter;
1981         struct ip_vs_service *svc;
1982
1983         ++*pos;
1984         if (v == SEQ_START_TOKEN)
1985                 return ip_vs_info_array(seq,0);
1986
1987         svc = v;
1988         iter = seq->private;
1989
1990         if (iter->table == ip_vs_svc_table) {
1991                 /* next service in table hashed by protocol */
1992                 e = rcu_dereference(hlist_next_rcu(&svc->s_list));
1993                 if (e)
1994                         return hlist_entry(e, struct ip_vs_service, s_list);
1995
1996                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1997                         hlist_for_each_entry_rcu(svc,
1998                                                  &ip_vs_svc_table[iter->bucket],
1999                                                  s_list) {
2000                                 return svc;
2001                         }
2002                 }
2003
2004                 iter->table = ip_vs_svc_fwm_table;
2005                 iter->bucket = -1;
2006                 goto scan_fwmark;
2007         }
2008
2009         /* next service in hashed by fwmark */
2010         e = rcu_dereference(hlist_next_rcu(&svc->f_list));
2011         if (e)
2012                 return hlist_entry(e, struct ip_vs_service, f_list);
2013
2014  scan_fwmark:
2015         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2016                 hlist_for_each_entry_rcu(svc,
2017                                          &ip_vs_svc_fwm_table[iter->bucket],
2018                                          f_list)
2019                         return svc;
2020         }
2021
2022         return NULL;
2023 }
2024
2025 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2026         __releases(RCU)
2027 {
2028         rcu_read_unlock();
2029 }
2030
2031
2032 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2033 {
2034         if (v == SEQ_START_TOKEN) {
2035                 seq_printf(seq,
2036                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
2037                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2038                 seq_puts(seq,
2039                          "Prot LocalAddress:Port Scheduler Flags\n");
2040                 seq_puts(seq,
2041                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2042         } else {
2043                 const struct ip_vs_service *svc = v;
2044                 const struct ip_vs_iter *iter = seq->private;
2045                 const struct ip_vs_dest *dest;
2046                 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
2047                 char *sched_name = sched ? sched->name : "none";
2048
2049                 if (iter->table == ip_vs_svc_table) {
2050 #ifdef CONFIG_IP_VS_IPV6
2051                         if (svc->af == AF_INET6)
2052                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
2053                                            ip_vs_proto_name(svc->protocol),
2054                                            &svc->addr.in6,
2055                                            ntohs(svc->port),
2056                                            sched_name);
2057                         else
2058 #endif
2059                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
2060                                            ip_vs_proto_name(svc->protocol),
2061                                            ntohl(svc->addr.ip),
2062                                            ntohs(svc->port),
2063                                            sched_name,
2064                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2065                 } else {
2066                         seq_printf(seq, "FWM  %08X %s %s",
2067                                    svc->fwmark, sched_name,
2068                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2069                 }
2070
2071                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2072                         seq_printf(seq, "persistent %d %08X\n",
2073                                 svc->timeout,
2074                                 ntohl(svc->netmask));
2075                 else
2076                         seq_putc(seq, '\n');
2077
2078                 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
2079 #ifdef CONFIG_IP_VS_IPV6
2080                         if (dest->af == AF_INET6)
2081                                 seq_printf(seq,
2082                                            "  -> [%pI6]:%04X"
2083                                            "      %-7s %-6d %-10d %-10d\n",
2084                                            &dest->addr.in6,
2085                                            ntohs(dest->port),
2086                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2087                                            atomic_read(&dest->weight),
2088                                            atomic_read(&dest->activeconns),
2089                                            atomic_read(&dest->inactconns));
2090                         else
2091 #endif
2092                                 seq_printf(seq,
2093                                            "  -> %08X:%04X      "
2094                                            "%-7s %-6d %-10d %-10d\n",
2095                                            ntohl(dest->addr.ip),
2096                                            ntohs(dest->port),
2097                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2098                                            atomic_read(&dest->weight),
2099                                            atomic_read(&dest->activeconns),
2100                                            atomic_read(&dest->inactconns));
2101
2102                 }
2103         }
2104         return 0;
2105 }
2106
2107 static const struct seq_operations ip_vs_info_seq_ops = {
2108         .start = ip_vs_info_seq_start,
2109         .next  = ip_vs_info_seq_next,
2110         .stop  = ip_vs_info_seq_stop,
2111         .show  = ip_vs_info_seq_show,
2112 };
2113
2114 static int ip_vs_info_open(struct inode *inode, struct file *file)
2115 {
2116         return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2117                         sizeof(struct ip_vs_iter));
2118 }
2119
2120 static const struct file_operations ip_vs_info_fops = {
2121         .owner   = THIS_MODULE,
2122         .open    = ip_vs_info_open,
2123         .read    = seq_read,
2124         .llseek  = seq_lseek,
2125         .release = seq_release_net,
2126 };
2127
2128 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2129 {
2130         struct net *net = seq_file_single_net(seq);
2131         struct ip_vs_kstats show;
2132
2133 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2134         seq_puts(seq,
2135                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
2136         seq_printf(seq,
2137                    "   Conns  Packets  Packets            Bytes            Bytes\n");
2138
2139         ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2140         seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
2141                    (unsigned long long)show.conns,
2142                    (unsigned long long)show.inpkts,
2143                    (unsigned long long)show.outpkts,
2144                    (unsigned long long)show.inbytes,
2145                    (unsigned long long)show.outbytes);
2146
2147 /*                01234567 01234567 01234567 0123456701234567 0123456701234567*/
2148         seq_puts(seq,
2149                  " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2150         seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
2151                    (unsigned long long)show.cps,
2152                    (unsigned long long)show.inpps,
2153                    (unsigned long long)show.outpps,
2154                    (unsigned long long)show.inbps,
2155                    (unsigned long long)show.outbps);
2156
2157         return 0;
2158 }
2159
2160 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2161 {
2162         return single_open_net(inode, file, ip_vs_stats_show);
2163 }
2164
2165 static const struct file_operations ip_vs_stats_fops = {
2166         .owner = THIS_MODULE,
2167         .open = ip_vs_stats_seq_open,
2168         .read = seq_read,
2169         .llseek = seq_lseek,
2170         .release = single_release_net,
2171 };
2172
2173 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2174 {
2175         struct net *net = seq_file_single_net(seq);
2176         struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2177         struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
2178         struct ip_vs_kstats kstats;
2179         int i;
2180
2181 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2182         seq_puts(seq,
2183                  "       Total Incoming Outgoing         Incoming         Outgoing\n");
2184         seq_printf(seq,
2185                    "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2186
2187         for_each_possible_cpu(i) {
2188                 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2189                 unsigned int start;
2190                 u64 conns, inpkts, outpkts, inbytes, outbytes;
2191
2192                 do {
2193                         start = u64_stats_fetch_begin_irq(&u->syncp);
2194                         conns = u->cnt.conns;
2195                         inpkts = u->cnt.inpkts;
2196                         outpkts = u->cnt.outpkts;
2197                         inbytes = u->cnt.inbytes;
2198                         outbytes = u->cnt.outbytes;
2199                 } while (u64_stats_fetch_retry_irq(&u->syncp, start));
2200
2201                 seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
2202                            i, (u64)conns, (u64)inpkts,
2203                            (u64)outpkts, (u64)inbytes,
2204                            (u64)outbytes);
2205         }
2206
2207         ip_vs_copy_stats(&kstats, tot_stats);
2208
2209         seq_printf(seq, "  ~ %8LX %8LX %8LX %16LX %16LX\n\n",
2210                    (unsigned long long)kstats.conns,
2211                    (unsigned long long)kstats.inpkts,
2212                    (unsigned long long)kstats.outpkts,
2213                    (unsigned long long)kstats.inbytes,
2214                    (unsigned long long)kstats.outbytes);
2215
2216 /*                ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2217         seq_puts(seq,
2218                  "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2219         seq_printf(seq, "    %8LX %8LX %8LX %16LX %16LX\n",
2220                    kstats.cps,
2221                    kstats.inpps,
2222                    kstats.outpps,
2223                    kstats.inbps,
2224                    kstats.outbps);
2225
2226         return 0;
2227 }
2228
2229 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2230 {
2231         return single_open_net(inode, file, ip_vs_stats_percpu_show);
2232 }
2233
2234 static const struct file_operations ip_vs_stats_percpu_fops = {
2235         .owner = THIS_MODULE,
2236         .open = ip_vs_stats_percpu_seq_open,
2237         .read = seq_read,
2238         .llseek = seq_lseek,
2239         .release = single_release_net,
2240 };
2241 #endif
2242
2243 /*
2244  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2245  */
2246 static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
2247 {
2248 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2249         struct ip_vs_proto_data *pd;
2250 #endif
2251
2252         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2253                   u->tcp_timeout,
2254                   u->tcp_fin_timeout,
2255                   u->udp_timeout);
2256
2257 #ifdef CONFIG_IP_VS_PROTO_TCP
2258         if (u->tcp_timeout) {
2259                 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2260                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2261                         = u->tcp_timeout * HZ;
2262         }
2263
2264         if (u->tcp_fin_timeout) {
2265                 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2266                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2267                         = u->tcp_fin_timeout * HZ;
2268         }
2269 #endif
2270
2271 #ifdef CONFIG_IP_VS_PROTO_UDP
2272         if (u->udp_timeout) {
2273                 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
2274                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2275                         = u->udp_timeout * HZ;
2276         }
2277 #endif
2278         return 0;
2279 }
2280
2281 #define CMDID(cmd)              (cmd - IP_VS_BASE_CTL)
2282
2283 struct ip_vs_svcdest_user {
2284         struct ip_vs_service_user       s;
2285         struct ip_vs_dest_user          d;
2286 };
2287
2288 static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = {
2289         [CMDID(IP_VS_SO_SET_ADD)]         = sizeof(struct ip_vs_service_user),
2290         [CMDID(IP_VS_SO_SET_EDIT)]        = sizeof(struct ip_vs_service_user),
2291         [CMDID(IP_VS_SO_SET_DEL)]         = sizeof(struct ip_vs_service_user),
2292         [CMDID(IP_VS_SO_SET_ADDDEST)]     = sizeof(struct ip_vs_svcdest_user),
2293         [CMDID(IP_VS_SO_SET_DELDEST)]     = sizeof(struct ip_vs_svcdest_user),
2294         [CMDID(IP_VS_SO_SET_EDITDEST)]    = sizeof(struct ip_vs_svcdest_user),
2295         [CMDID(IP_VS_SO_SET_TIMEOUT)]     = sizeof(struct ip_vs_timeout_user),
2296         [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user),
2297         [CMDID(IP_VS_SO_SET_STOPDAEMON)]  = sizeof(struct ip_vs_daemon_user),
2298         [CMDID(IP_VS_SO_SET_ZERO)]        = sizeof(struct ip_vs_service_user),
2299 };
2300
2301 union ip_vs_set_arglen {
2302         struct ip_vs_service_user       field_IP_VS_SO_SET_ADD;
2303         struct ip_vs_service_user       field_IP_VS_SO_SET_EDIT;
2304         struct ip_vs_service_user       field_IP_VS_SO_SET_DEL;
2305         struct ip_vs_svcdest_user       field_IP_VS_SO_SET_ADDDEST;
2306         struct ip_vs_svcdest_user       field_IP_VS_SO_SET_DELDEST;
2307         struct ip_vs_svcdest_user       field_IP_VS_SO_SET_EDITDEST;
2308         struct ip_vs_timeout_user       field_IP_VS_SO_SET_TIMEOUT;
2309         struct ip_vs_daemon_user        field_IP_VS_SO_SET_STARTDAEMON;
2310         struct ip_vs_daemon_user        field_IP_VS_SO_SET_STOPDAEMON;
2311         struct ip_vs_service_user       field_IP_VS_SO_SET_ZERO;
2312 };
2313
2314 #define MAX_SET_ARGLEN  sizeof(union ip_vs_set_arglen)
2315
2316 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2317                                   struct ip_vs_service_user *usvc_compat)
2318 {
2319         memset(usvc, 0, sizeof(*usvc));
2320
2321         usvc->af                = AF_INET;
2322         usvc->protocol          = usvc_compat->protocol;
2323         usvc->addr.ip           = usvc_compat->addr;
2324         usvc->port              = usvc_compat->port;
2325         usvc->fwmark            = usvc_compat->fwmark;
2326
2327         /* Deep copy of sched_name is not needed here */
2328         usvc->sched_name        = usvc_compat->sched_name;
2329
2330         usvc->flags             = usvc_compat->flags;
2331         usvc->timeout           = usvc_compat->timeout;
2332         usvc->netmask           = usvc_compat->netmask;
2333 }
2334
2335 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2336                                    struct ip_vs_dest_user *udest_compat)
2337 {
2338         memset(udest, 0, sizeof(*udest));
2339
2340         udest->addr.ip          = udest_compat->addr;
2341         udest->port             = udest_compat->port;
2342         udest->conn_flags       = udest_compat->conn_flags;
2343         udest->weight           = udest_compat->weight;
2344         udest->u_threshold      = udest_compat->u_threshold;
2345         udest->l_threshold      = udest_compat->l_threshold;
2346         udest->af               = AF_INET;
2347 }
2348
2349 static int
2350 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2351 {
2352         struct net *net = sock_net(sk);
2353         int ret;
2354         unsigned char arg[MAX_SET_ARGLEN];
2355         struct ip_vs_service_user *usvc_compat;
2356         struct ip_vs_service_user_kern usvc;
2357         struct ip_vs_service *svc;
2358         struct ip_vs_dest_user *udest_compat;
2359         struct ip_vs_dest_user_kern udest;
2360         struct netns_ipvs *ipvs = net_ipvs(net);
2361
2362         BUILD_BUG_ON(sizeof(arg) > 255);
2363         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2364                 return -EPERM;
2365
2366         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2367                 return -EINVAL;
2368         if (len != set_arglen[CMDID(cmd)]) {
2369                 IP_VS_DBG(1, "set_ctl: len %u != %u\n",
2370                           len, set_arglen[CMDID(cmd)]);
2371                 return -EINVAL;
2372         }
2373
2374         if (copy_from_user(arg, user, len) != 0)
2375                 return -EFAULT;
2376
2377         /* increase the module use count */
2378         ip_vs_use_count_inc();
2379
2380         /* Handle daemons since they have another lock */
2381         if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2382             cmd == IP_VS_SO_SET_STOPDAEMON) {
2383                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2384
2385                 if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2386                         struct ipvs_sync_daemon_cfg cfg;
2387
2388                         memset(&cfg, 0, sizeof(cfg));
2389                         strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
2390                                 sizeof(cfg.mcast_ifn));
2391                         cfg.syncid = dm->syncid;
2392                         rtnl_lock();
2393                         mutex_lock(&ipvs->sync_mutex);
2394                         ret = start_sync_thread(ipvs, &cfg, dm->state);
2395                         mutex_unlock(&ipvs->sync_mutex);
2396                         rtnl_unlock();
2397                 } else {
2398                         mutex_lock(&ipvs->sync_mutex);
2399                         ret = stop_sync_thread(ipvs, dm->state);
2400                         mutex_unlock(&ipvs->sync_mutex);
2401                 }
2402                 goto out_dec;
2403         }
2404
2405         mutex_lock(&__ip_vs_mutex);
2406         if (cmd == IP_VS_SO_SET_FLUSH) {
2407                 /* Flush the virtual service */
2408                 ret = ip_vs_flush(ipvs, false);
2409                 goto out_unlock;
2410         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2411                 /* Set timeout values for (tcp tcpfin udp) */
2412                 ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
2413                 goto out_unlock;
2414         }
2415
2416         usvc_compat = (struct ip_vs_service_user *)arg;
2417         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2418
2419         /* We only use the new structs internally, so copy userspace compat
2420          * structs to extended internal versions */
2421         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2422         ip_vs_copy_udest_compat(&udest, udest_compat);
2423
2424         if (cmd == IP_VS_SO_SET_ZERO) {
2425                 /* if no service address is set, zero counters in all */
2426                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2427                         ret = ip_vs_zero_all(ipvs);
2428                         goto out_unlock;
2429                 }
2430         }
2431
2432         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2433         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2434             usvc.protocol != IPPROTO_SCTP) {
2435                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2436                        usvc.protocol, &usvc.addr.ip,
2437                        ntohs(usvc.port), usvc.sched_name);
2438                 ret = -EFAULT;
2439                 goto out_unlock;
2440         }
2441
2442         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2443         rcu_read_lock();
2444         if (usvc.fwmark == 0)
2445                 svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
2446                                            &usvc.addr, usvc.port);
2447         else
2448                 svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
2449         rcu_read_unlock();
2450
2451         if (cmd != IP_VS_SO_SET_ADD
2452             && (svc == NULL || svc->protocol != usvc.protocol)) {
2453                 ret = -ESRCH;
2454                 goto out_unlock;
2455         }
2456
2457         switch (cmd) {
2458         case IP_VS_SO_SET_ADD:
2459                 if (svc != NULL)
2460                         ret = -EEXIST;
2461                 else
2462                         ret = ip_vs_add_service(ipvs, &usvc, &svc);
2463                 break;
2464         case IP_VS_SO_SET_EDIT:
2465                 ret = ip_vs_edit_service(svc, &usvc);
2466                 break;
2467         case IP_VS_SO_SET_DEL:
2468                 ret = ip_vs_del_service(svc);
2469                 if (!ret)
2470                         goto out_unlock;
2471                 break;
2472         case IP_VS_SO_SET_ZERO:
2473                 ret = ip_vs_zero_service(svc);
2474                 break;
2475         case IP_VS_SO_SET_ADDDEST:
2476                 ret = ip_vs_add_dest(svc, &udest);
2477                 break;
2478         case IP_VS_SO_SET_EDITDEST:
2479                 ret = ip_vs_edit_dest(svc, &udest);
2480                 break;
2481         case IP_VS_SO_SET_DELDEST:
2482                 ret = ip_vs_del_dest(svc, &udest);
2483                 break;
2484         default:
2485                 ret = -EINVAL;
2486         }
2487
2488   out_unlock:
2489         mutex_unlock(&__ip_vs_mutex);
2490   out_dec:
2491         /* decrease the module use count */
2492         ip_vs_use_count_dec();
2493
2494         return ret;
2495 }
2496
2497
2498 static void
2499 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2500 {
2501         struct ip_vs_scheduler *sched;
2502         struct ip_vs_kstats kstats;
2503         char *sched_name;
2504
2505         sched = rcu_dereference_protected(src->scheduler, 1);
2506         sched_name = sched ? sched->name : "none";
2507         dst->protocol = src->protocol;
2508         dst->addr = src->addr.ip;
2509         dst->port = src->port;
2510         dst->fwmark = src->fwmark;
2511         strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
2512         dst->flags = src->flags;
2513         dst->timeout = src->timeout / HZ;
2514         dst->netmask = src->netmask;
2515         dst->num_dests = src->num_dests;
2516         ip_vs_copy_stats(&kstats, &src->stats);
2517         ip_vs_export_stats_user(&dst->stats, &kstats);
2518 }
2519
2520 static inline int
2521 __ip_vs_get_service_entries(struct netns_ipvs *ipvs,
2522                             const struct ip_vs_get_services *get,
2523                             struct ip_vs_get_services __user *uptr)
2524 {
2525         int idx, count=0;
2526         struct ip_vs_service *svc;
2527         struct ip_vs_service_entry entry;
2528         int ret = 0;
2529
2530         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2531                 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2532                         /* Only expose IPv4 entries to old interface */
2533                         if (svc->af != AF_INET || (svc->ipvs != ipvs))
2534                                 continue;
2535
2536                         if (count >= get->num_services)
2537                                 goto out;
2538                         memset(&entry, 0, sizeof(entry));
2539                         ip_vs_copy_service(&entry, svc);
2540                         if (copy_to_user(&uptr->entrytable[count],
2541                                          &entry, sizeof(entry))) {
2542                                 ret = -EFAULT;
2543                                 goto out;
2544                         }
2545                         count++;
2546                 }
2547         }
2548
2549         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2550                 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2551                         /* Only expose IPv4 entries to old interface */
2552                         if (svc->af != AF_INET || (svc->ipvs != ipvs))
2553                                 continue;
2554
2555                         if (count >= get->num_services)
2556                                 goto out;
2557                         memset(&entry, 0, sizeof(entry));
2558                         ip_vs_copy_service(&entry, svc);
2559                         if (copy_to_user(&uptr->entrytable[count],
2560                                          &entry, sizeof(entry))) {
2561                                 ret = -EFAULT;
2562                                 goto out;
2563                         }
2564                         count++;
2565                 }
2566         }
2567 out:
2568         return ret;
2569 }
2570
2571 static inline int
2572 __ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
2573                          struct ip_vs_get_dests __user *uptr)
2574 {
2575         struct ip_vs_service *svc;
2576         union nf_inet_addr addr = { .ip = get->addr };
2577         int ret = 0;
2578
2579         rcu_read_lock();
2580         if (get->fwmark)
2581                 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
2582         else
2583                 svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
2584                                            get->port);
2585         rcu_read_unlock();
2586
2587         if (svc) {
2588                 int count = 0;
2589                 struct ip_vs_dest *dest;
2590                 struct ip_vs_dest_entry entry;
2591                 struct ip_vs_kstats kstats;
2592
2593                 memset(&entry, 0, sizeof(entry));
2594                 list_for_each_entry(dest, &svc->destinations, n_list) {
2595                         if (count >= get->num_dests)
2596                                 break;
2597
2598                         /* Cannot expose heterogeneous members via sockopt
2599                          * interface
2600                          */
2601                         if (dest->af != svc->af)
2602                                 continue;
2603
2604                         entry.addr = dest->addr.ip;
2605                         entry.port = dest->port;
2606                         entry.conn_flags = atomic_read(&dest->conn_flags);
2607                         entry.weight = atomic_read(&dest->weight);
2608                         entry.u_threshold = dest->u_threshold;
2609                         entry.l_threshold = dest->l_threshold;
2610                         entry.activeconns = atomic_read(&dest->activeconns);
2611                         entry.inactconns = atomic_read(&dest->inactconns);
2612                         entry.persistconns = atomic_read(&dest->persistconns);
2613                         ip_vs_copy_stats(&kstats, &dest->stats);
2614                         ip_vs_export_stats_user(&entry.stats, &kstats);
2615                         if (copy_to_user(&uptr->entrytable[count],
2616                                          &entry, sizeof(entry))) {
2617                                 ret = -EFAULT;
2618                                 break;
2619                         }
2620                         count++;
2621                 }
2622         } else
2623                 ret = -ESRCH;
2624         return ret;
2625 }
2626
2627 static inline void
2628 __ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
2629 {
2630 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2631         struct ip_vs_proto_data *pd;
2632 #endif
2633
2634         memset(u, 0, sizeof (*u));
2635
2636 #ifdef CONFIG_IP_VS_PROTO_TCP
2637         pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
2638         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2639         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2640 #endif
2641 #ifdef CONFIG_IP_VS_PROTO_UDP
2642         pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
2643         u->udp_timeout =
2644                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2645 #endif
2646 }
2647
2648 static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = {
2649         [CMDID(IP_VS_SO_GET_VERSION)]  = 64,
2650         [CMDID(IP_VS_SO_GET_INFO)]     = sizeof(struct ip_vs_getinfo),
2651         [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services),
2652         [CMDID(IP_VS_SO_GET_SERVICE)]  = sizeof(struct ip_vs_service_entry),
2653         [CMDID(IP_VS_SO_GET_DESTS)]    = sizeof(struct ip_vs_get_dests),
2654         [CMDID(IP_VS_SO_GET_TIMEOUT)]  = sizeof(struct ip_vs_timeout_user),
2655         [CMDID(IP_VS_SO_GET_DAEMON)]   = 2 * sizeof(struct ip_vs_daemon_user),
2656 };
2657
2658 union ip_vs_get_arglen {
2659         char                            field_IP_VS_SO_GET_VERSION[64];
2660         struct ip_vs_getinfo            field_IP_VS_SO_GET_INFO;
2661         struct ip_vs_get_services       field_IP_VS_SO_GET_SERVICES;
2662         struct ip_vs_service_entry      field_IP_VS_SO_GET_SERVICE;
2663         struct ip_vs_get_dests          field_IP_VS_SO_GET_DESTS;
2664         struct ip_vs_timeout_user       field_IP_VS_SO_GET_TIMEOUT;
2665         struct ip_vs_daemon_user        field_IP_VS_SO_GET_DAEMON[2];
2666 };
2667
2668 #define MAX_GET_ARGLEN  sizeof(union ip_vs_get_arglen)
2669
2670 static int
2671 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2672 {
2673         unsigned char arg[MAX_GET_ARGLEN];
2674         int ret = 0;
2675         unsigned int copylen;
2676         struct net *net = sock_net(sk);
2677         struct netns_ipvs *ipvs = net_ipvs(net);
2678
2679         BUG_ON(!net);
2680         BUILD_BUG_ON(sizeof(arg) > 255);
2681         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2682                 return -EPERM;
2683
2684         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2685                 return -EINVAL;
2686
2687         copylen = get_arglen[CMDID(cmd)];
2688         if (*len < (int) copylen) {
2689                 IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen);
2690                 return -EINVAL;
2691         }
2692
2693         if (copy_from_user(arg, user, copylen) != 0)
2694                 return -EFAULT;
2695         /*
2696          * Handle daemons first since it has its own locking
2697          */
2698         if (cmd == IP_VS_SO_GET_DAEMON) {
2699                 struct ip_vs_daemon_user d[2];
2700
2701                 memset(&d, 0, sizeof(d));
2702                 mutex_lock(&ipvs->sync_mutex);
2703                 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2704                         d[0].state = IP_VS_STATE_MASTER;
2705                         strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
2706                                 sizeof(d[0].mcast_ifn));
2707                         d[0].syncid = ipvs->mcfg.syncid;
2708                 }
2709                 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2710                         d[1].state = IP_VS_STATE_BACKUP;
2711                         strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
2712                                 sizeof(d[1].mcast_ifn));
2713                         d[1].syncid = ipvs->bcfg.syncid;
2714                 }
2715                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2716                         ret = -EFAULT;
2717                 mutex_unlock(&ipvs->sync_mutex);
2718                 return ret;
2719         }
2720
2721         mutex_lock(&__ip_vs_mutex);
2722         switch (cmd) {
2723         case IP_VS_SO_GET_VERSION:
2724         {
2725                 char buf[64];
2726
2727                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2728                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2729                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2730                         ret = -EFAULT;
2731                         goto out;
2732                 }
2733                 *len = strlen(buf)+1;
2734         }
2735         break;
2736
2737         case IP_VS_SO_GET_INFO:
2738         {
2739                 struct ip_vs_getinfo info;
2740                 info.version = IP_VS_VERSION_CODE;
2741                 info.size = ip_vs_conn_tab_size;
2742                 info.num_services = ipvs->num_services;
2743                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2744                         ret = -EFAULT;
2745         }
2746         break;
2747
2748         case IP_VS_SO_GET_SERVICES:
2749         {
2750                 struct ip_vs_get_services *get;
2751                 int size;
2752
2753                 get = (struct ip_vs_get_services *)arg;
2754                 size = sizeof(*get) +
2755                         sizeof(struct ip_vs_service_entry) * get->num_services;
2756                 if (*len != size) {
2757                         pr_err("length: %u != %u\n", *len, size);
2758                         ret = -EINVAL;
2759                         goto out;
2760                 }
2761                 ret = __ip_vs_get_service_entries(ipvs, get, user);
2762         }
2763         break;
2764
2765         case IP_VS_SO_GET_SERVICE:
2766         {
2767                 struct ip_vs_service_entry *entry;
2768                 struct ip_vs_service *svc;
2769                 union nf_inet_addr addr;
2770
2771                 entry = (struct ip_vs_service_entry *)arg;
2772                 addr.ip = entry->addr;
2773                 rcu_read_lock();
2774                 if (entry->fwmark)
2775                         svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
2776                 else
2777                         svc = __ip_vs_service_find(ipvs, AF_INET,
2778                                                    entry->protocol, &addr,
2779                                                    entry->port);
2780                 rcu_read_unlock();
2781                 if (svc) {
2782                         ip_vs_copy_service(entry, svc);
2783                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2784                                 ret = -EFAULT;
2785                 } else
2786                         ret = -ESRCH;
2787         }
2788         break;
2789
2790         case IP_VS_SO_GET_DESTS:
2791         {
2792                 struct ip_vs_get_dests *get;
2793                 int size;
2794
2795                 get = (struct ip_vs_get_dests *)arg;
2796                 size = sizeof(*get) +
2797                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2798                 if (*len != size) {
2799                         pr_err("length: %u != %u\n", *len, size);
2800                         ret = -EINVAL;
2801                         goto out;
2802                 }
2803                 ret = __ip_vs_get_dest_entries(ipvs, get, user);
2804         }
2805         break;
2806
2807         case IP_VS_SO_GET_TIMEOUT:
2808         {
2809                 struct ip_vs_timeout_user t;
2810
2811                 __ip_vs_get_timeouts(ipvs, &t);
2812                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2813                         ret = -EFAULT;
2814         }
2815         break;
2816
2817         default:
2818                 ret = -EINVAL;
2819         }
2820
2821 out:
2822         mutex_unlock(&__ip_vs_mutex);
2823         return ret;
2824 }
2825
2826
2827 static struct nf_sockopt_ops ip_vs_sockopts = {
2828         .pf             = PF_INET,
2829         .set_optmin     = IP_VS_BASE_CTL,
2830         .set_optmax     = IP_VS_SO_SET_MAX+1,
2831         .set            = do_ip_vs_set_ctl,
2832         .get_optmin     = IP_VS_BASE_CTL,
2833         .get_optmax     = IP_VS_SO_GET_MAX+1,
2834         .get            = do_ip_vs_get_ctl,
2835         .owner          = THIS_MODULE,
2836 };
2837
2838 /*
2839  * Generic Netlink interface
2840  */
2841
2842 /* IPVS genetlink family */
2843 static struct genl_family ip_vs_genl_family = {
2844         .id             = GENL_ID_GENERATE,
2845         .hdrsize        = 0,
2846         .name           = IPVS_GENL_NAME,
2847         .version        = IPVS_GENL_VERSION,
2848         .maxattr        = IPVS_CMD_MAX,
2849         .netnsok        = true,         /* Make ipvsadm to work on netns */
2850 };
2851
2852 /* Policy used for first-level command attributes */
2853 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2854         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2855         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2856         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2857         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2858         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2859         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2860 };
2861
2862 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2863 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2864         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2865         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2866                                             .len = IP_VS_IFNAME_MAXLEN },
2867         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2868         [IPVS_DAEMON_ATTR_SYNC_MAXLEN]  = { .type = NLA_U16 },
2869         [IPVS_DAEMON_ATTR_MCAST_GROUP]  = { .type = NLA_U32 },
2870         [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) },
2871         [IPVS_DAEMON_ATTR_MCAST_PORT]   = { .type = NLA_U16 },
2872         [IPVS_DAEMON_ATTR_MCAST_TTL]    = { .type = NLA_U8 },
2873 };
2874
2875 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2876 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2877         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2878         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2879         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2880                                             .len = sizeof(union nf_inet_addr) },
2881         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2882         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2883         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2884                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2885         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2886                                             .len = IP_VS_PENAME_MAXLEN },
2887         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2888                                             .len = sizeof(struct ip_vs_flags) },
2889         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2890         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2891         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2892 };
2893
2894 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2895 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2896         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2897                                             .len = sizeof(union nf_inet_addr) },
2898         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2899         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2900         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2901         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2902         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2903         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2904         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2905         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2906         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2907         [IPVS_DEST_ATTR_ADDR_FAMILY]    = { .type = NLA_U16 },
2908 };
2909
2910 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2911                                  struct ip_vs_kstats *kstats)
2912 {
2913         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2914
2915         if (!nl_stats)
2916                 return -EMSGSIZE;
2917
2918         if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
2919             nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
2920             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
2921             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes,
2922                               IPVS_STATS_ATTR_PAD) ||
2923             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes,
2924                               IPVS_STATS_ATTR_PAD) ||
2925             nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
2926             nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
2927             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
2928             nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
2929             nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
2930                 goto nla_put_failure;
2931         nla_nest_end(skb, nl_stats);
2932
2933         return 0;
2934
2935 nla_put_failure:
2936         nla_nest_cancel(skb, nl_stats);
2937         return -EMSGSIZE;
2938 }
2939
2940 static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
2941                                    struct ip_vs_kstats *kstats)
2942 {
2943         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2944
2945         if (!nl_stats)
2946                 return -EMSGSIZE;
2947
2948         if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns,
2949                               IPVS_STATS_ATTR_PAD) ||
2950             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts,
2951                               IPVS_STATS_ATTR_PAD) ||
2952             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts,
2953                               IPVS_STATS_ATTR_PAD) ||
2954             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes,
2955                               IPVS_STATS_ATTR_PAD) ||
2956             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes,
2957                               IPVS_STATS_ATTR_PAD) ||
2958             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps,
2959                               IPVS_STATS_ATTR_PAD) ||
2960             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps,
2961                               IPVS_STATS_ATTR_PAD) ||
2962             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps,
2963                               IPVS_STATS_ATTR_PAD) ||
2964             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps,
2965                               IPVS_STATS_ATTR_PAD) ||
2966             nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps,
2967                               IPVS_STATS_ATTR_PAD))
2968                 goto nla_put_failure;
2969         nla_nest_end(skb, nl_stats);
2970
2971         return 0;
2972
2973 nla_put_failure:
2974         nla_nest_cancel(skb, nl_stats);
2975         return -EMSGSIZE;
2976 }
2977
2978 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2979                                    struct ip_vs_service *svc)
2980 {
2981         struct ip_vs_scheduler *sched;
2982         struct ip_vs_pe *pe;
2983         struct nlattr *nl_service;
2984         struct ip_vs_flags flags = { .flags = svc->flags,
2985                                      .mask = ~0 };
2986         struct ip_vs_kstats kstats;
2987         char *sched_name;
2988
2989         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2990         if (!nl_service)
2991                 return -EMSGSIZE;
2992
2993         if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2994                 goto nla_put_failure;
2995         if (svc->fwmark) {
2996                 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2997                         goto nla_put_failure;
2998         } else {
2999                 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
3000                     nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
3001                     nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
3002                         goto nla_put_failure;
3003         }
3004
3005         sched = rcu_dereference_protected(svc->scheduler, 1);
3006         sched_name = sched ? sched->name : "none";
3007         pe = rcu_dereference_protected(svc->pe, 1);
3008         if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
3009             (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
3010             nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
3011             nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
3012             nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
3013                 goto nla_put_failure;
3014         ip_vs_copy_stats(&kstats, &svc->stats);
3015         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
3016                 goto nla_put_failure;
3017         if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
3018                 goto nla_put_failure;
3019
3020         nla_nest_end(skb, nl_service);
3021
3022         return 0;
3023
3024 nla_put_failure:
3025         nla_nest_cancel(skb, nl_service);
3026         return -EMSGSIZE;
3027 }
3028
3029 static int ip_vs_genl_dump_service(struct sk_buff *skb,
3030                                    struct ip_vs_service *svc,
3031                                    struct netlink_callback *cb)
3032 {
3033         void *hdr;
3034
3035         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3036                           &ip_vs_genl_family, NLM_F_MULTI,
3037                           IPVS_CMD_NEW_SERVICE);
3038         if (!hdr)
3039                 return -EMSGSIZE;
3040
3041         if (ip_vs_genl_fill_service(skb, svc) < 0)
3042                 goto nla_put_failure;
3043
3044         genlmsg_end(skb, hdr);
3045         return 0;
3046
3047 nla_put_failure:
3048         genlmsg_cancel(skb, hdr);
3049         return -EMSGSIZE;
3050 }
3051
3052 static int ip_vs_genl_dump_services(struct sk_buff *skb,
3053                                     struct netlink_callback *cb)
3054 {
3055         int idx = 0, i;
3056         int start = cb->args[0];
3057         struct ip_vs_service *svc;
3058         struct net *net = sock_net(skb->sk);
3059         struct netns_ipvs *ipvs = net_ipvs(net);
3060
3061         mutex_lock(&__ip_vs_mutex);
3062         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3063                 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
3064                         if (++idx <= start || (svc->ipvs != ipvs))
3065                                 continue;
3066                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3067                                 idx--;
3068                                 goto nla_put_failure;
3069                         }
3070                 }
3071         }
3072
3073         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
3074                 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
3075                         if (++idx <= start || (svc->ipvs != ipvs))
3076                                 continue;
3077                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
3078                                 idx--;
3079                                 goto nla_put_failure;
3080                         }
3081                 }
3082         }
3083
3084 nla_put_failure:
3085         mutex_unlock(&__ip_vs_mutex);
3086         cb->args[0] = idx;
3087
3088         return skb->len;
3089 }
3090
3091 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
3092                                     struct ip_vs_service_user_kern *usvc,
3093                                     struct nlattr *nla, int full_entry,
3094                                     struct ip_vs_service **ret_svc)
3095 {
3096         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3097         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
3098         struct ip_vs_service *svc;
3099
3100         /* Parse mandatory identifying service fields first */
3101         if (nla == NULL ||
3102             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3103                 return -EINVAL;
3104
3105         nla_af          = attrs[IPVS_SVC_ATTR_AF];
3106         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
3107         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
3108         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
3109         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
3110
3111         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3112                 return -EINVAL;
3113
3114         memset(usvc, 0, sizeof(*usvc));
3115
3116         usvc->af = nla_get_u16(nla_af);
3117 #ifdef CONFIG_IP_VS_IPV6
3118         if (usvc->af != AF_INET && usvc->af != AF_INET6)
3119 #else
3120         if (usvc->af != AF_INET)
3121 #endif
3122                 return -EAFNOSUPPORT;
3123
3124         if (nla_fwmark) {
3125                 usvc->protocol = IPPROTO_TCP;
3126                 usvc->fwmark = nla_get_u32(nla_fwmark);
3127         } else {
3128                 usvc->protocol = nla_get_u16(nla_protocol);
3129                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3130                 usvc->port = nla_get_be16(nla_port);
3131                 usvc->fwmark = 0;
3132         }
3133
3134         rcu_read_lock();
3135         if (usvc->fwmark)
3136                 svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
3137         else
3138                 svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
3139                                            &usvc->addr, usvc->port);
3140         rcu_read_unlock();
3141         *ret_svc = svc;
3142
3143         /* If a full entry was requested, check for the additional fields */
3144         if (full_entry) {
3145                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3146                               *nla_netmask;
3147                 struct ip_vs_flags flags;
3148
3149                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3150                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3151                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3152                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3153                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3154
3155                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3156                         return -EINVAL;
3157
3158                 nla_memcpy(&flags, nla_flags, sizeof(flags));
3159
3160                 /* prefill flags from service if it already exists */
3161                 if (svc)
3162                         usvc->flags = svc->flags;
3163
3164                 /* set new flags from userland */
3165                 usvc->flags = (usvc->flags & ~flags.mask) |
3166                               (flags.flags & flags.mask);
3167                 usvc->sched_name = nla_data(nla_sched);
3168                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3169                 usvc->timeout = nla_get_u32(nla_timeout);
3170                 usvc->netmask = nla_get_be32(nla_netmask);
3171         }
3172
3173         return 0;
3174 }
3175
3176 static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
3177                                                      struct nlattr *nla)
3178 {
3179         struct ip_vs_service_user_kern usvc;
3180         struct ip_vs_service *svc;
3181         int ret;
3182
3183         ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc);
3184         return ret ? ERR_PTR(ret) : svc;
3185 }
3186
3187 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3188 {
3189         struct nlattr *nl_dest;
3190         struct ip_vs_kstats kstats;
3191
3192         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3193         if (!nl_dest)
3194                 return -EMSGSIZE;
3195
3196         if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3197             nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3198             nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3199                         (atomic_read(&dest->conn_flags) &
3200                          IP_VS_CONN_F_FWD_MASK)) ||
3201             nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3202                         atomic_read(&dest->weight)) ||
3203             nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3204             nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3205             nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3206                         atomic_read(&dest->activeconns)) ||
3207             nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3208                         atomic_read(&dest->inactconns)) ||
3209             nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3210                         atomic_read(&dest->persistconns)) ||
3211             nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
3212                 goto nla_put_failure;
3213         ip_vs_copy_stats(&kstats, &dest->stats);
3214         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
3215                 goto nla_put_failure;
3216         if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
3217                 goto nla_put_failure;
3218
3219         nla_nest_end(skb, nl_dest);
3220
3221         return 0;
3222
3223 nla_put_failure:
3224         nla_nest_cancel(skb, nl_dest);
3225         return -EMSGSIZE;
3226 }
3227
3228 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3229                                 struct netlink_callback *cb)
3230 {
3231         void *hdr;
3232
3233         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3234                           &ip_vs_genl_family, NLM_F_MULTI,
3235                           IPVS_CMD_NEW_DEST);
3236         if (!hdr)
3237                 return -EMSGSIZE;
3238
3239         if (ip_vs_genl_fill_dest(skb, dest) < 0)
3240                 goto nla_put_failure;
3241
3242         genlmsg_end(skb, hdr);
3243         return 0;
3244
3245 nla_put_failure:
3246         genlmsg_cancel(skb, hdr);
3247         return -EMSGSIZE;
3248 }
3249
3250 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3251                                  struct netlink_callback *cb)
3252 {
3253         int idx = 0;
3254         int start = cb->args[0];
3255         struct ip_vs_service *svc;
3256         struct ip_vs_dest *dest;
3257         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3258         struct net *net = sock_net(skb->sk);
3259         struct netns_ipvs *ipvs = net_ipvs(net);
3260
3261         mutex_lock(&__ip_vs_mutex);
3262
3263         /* Try to find the service for which to dump destinations */
3264         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3265                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3266                 goto out_err;
3267
3268
3269         svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
3270         if (IS_ERR(svc) || svc == NULL)
3271                 goto out_err;
3272
3273         /* Dump the destinations */
3274         list_for_each_entry(dest, &svc->destinations, n_list) {
3275                 if (++idx <= start)
3276                         continue;
3277                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3278                         idx--;
3279                         goto nla_put_failure;
3280                 }
3281         }
3282
3283 nla_put_failure:
3284         cb->args[0] = idx;
3285
3286 out_err:
3287         mutex_unlock(&__ip_vs_mutex);
3288
3289         return skb->len;
3290 }
3291
3292 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3293                                  struct nlattr *nla, int full_entry)
3294 {
3295         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3296         struct nlattr *nla_addr, *nla_port;
3297         struct nlattr *nla_addr_family;
3298
3299         /* Parse mandatory identifying destination fields first */
3300         if (nla == NULL ||
3301             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3302                 return -EINVAL;
3303
3304         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3305         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3306         nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
3307
3308         if (!(nla_addr && nla_port))
3309                 return -EINVAL;
3310
3311         memset(udest, 0, sizeof(*udest));
3312
3313         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3314         udest->port = nla_get_be16(nla_port);
3315
3316         if (nla_addr_family)
3317                 udest->af = nla_get_u16(nla_addr_family);
3318         else
3319                 udest->af = 0;
3320
3321         /* If a full entry was requested, check for the additional fields */
3322         if (full_entry) {
3323                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3324                               *nla_l_thresh;
3325
3326                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3327                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3328                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3329                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3330
3331                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3332                         return -EINVAL;
3333
3334                 udest->conn_flags = nla_get_u32(nla_fwd)
3335                                     & IP_VS_CONN_F_FWD_MASK;
3336                 udest->weight = nla_get_u32(nla_weight);
3337                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3338                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3339         }
3340
3341         return 0;
3342 }
3343
3344 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
3345                                   struct ipvs_sync_daemon_cfg *c)
3346 {
3347         struct nlattr *nl_daemon;
3348
3349         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3350         if (!nl_daemon)
3351                 return -EMSGSIZE;
3352
3353         if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3354             nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
3355             nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
3356             nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
3357             nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
3358             nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
3359                 goto nla_put_failure;
3360 #ifdef CONFIG_IP_VS_IPV6
3361         if (c->mcast_af == AF_INET6) {
3362                 if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
3363                                      &c->mcast_group.in6))
3364                         goto nla_put_failure;
3365         } else
3366 #endif
3367                 if (c->mcast_af == AF_INET &&
3368                     nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
3369                                     c->mcast_group.ip))
3370                         goto nla_put_failure;
3371         nla_nest_end(skb, nl_daemon);
3372
3373         return 0;
3374
3375 nla_put_failure:
3376         nla_nest_cancel(skb, nl_daemon);
3377         return -EMSGSIZE;
3378 }
3379
3380 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
3381                                   struct ipvs_sync_daemon_cfg *c,
3382                                   struct netlink_callback *cb)
3383 {
3384         void *hdr;
3385         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3386                           &ip_vs_genl_family, NLM_F_MULTI,
3387                           IPVS_CMD_NEW_DAEMON);
3388         if (!hdr)
3389                 return -EMSGSIZE;
3390
3391         if (ip_vs_genl_fill_daemon(skb, state, c))
3392                 goto nla_put_failure;
3393
3394         genlmsg_end(skb, hdr);
3395         return 0;
3396
3397 nla_put_failure:
3398         genlmsg_cancel(skb, hdr);
3399         return -EMSGSIZE;
3400 }
3401
3402 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3403                                    struct netlink_callback *cb)
3404 {
3405         struct net *net = sock_net(skb->sk);
3406         struct netns_ipvs *ipvs = net_ipvs(net);
3407
3408         mutex_lock(&ipvs->sync_mutex);
3409         if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3410                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3411                                            &ipvs->mcfg, cb) < 0)
3412                         goto nla_put_failure;
3413
3414                 cb->args[0] = 1;
3415         }
3416
3417         if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3418                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3419                                            &ipvs->bcfg, cb) < 0)
3420                         goto nla_put_failure;
3421
3422                 cb->args[1] = 1;
3423         }
3424
3425 nla_put_failure:
3426         mutex_unlock(&ipvs->sync_mutex);
3427
3428         return skb->len;
3429 }
3430
3431 static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
3432 {
3433         struct ipvs_sync_daemon_cfg c;
3434         struct nlattr *a;
3435         int ret;
3436
3437         memset(&c, 0, sizeof(c));
3438         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3439               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3440               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3441                 return -EINVAL;
3442         strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3443                 sizeof(c.mcast_ifn));
3444         c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
3445
3446         a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
3447         if (a)
3448                 c.sync_maxlen = nla_get_u16(a);
3449
3450         a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
3451         if (a) {
3452                 c.mcast_af = AF_INET;
3453                 c.mcast_group.ip = nla_get_in_addr(a);
3454                 if (!ipv4_is_multicast(c.mcast_group.ip))
3455                         return -EINVAL;
3456         } else {
3457                 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
3458                 if (a) {
3459 #ifdef CONFIG_IP_VS_IPV6
3460                         int addr_type;
3461
3462                         c.mcast_af = AF_INET6;
3463                         c.mcast_group.in6 = nla_get_in6_addr(a);
3464                         addr_type = ipv6_addr_type(&c.mcast_group.in6);
3465                         if (!(addr_type & IPV6_ADDR_MULTICAST))
3466                                 return -EINVAL;
3467 #else
3468                         return -EAFNOSUPPORT;
3469 #endif
3470                 }
3471         }
3472
3473         a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
3474         if (a)
3475                 c.mcast_port = nla_get_u16(a);
3476
3477         a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
3478         if (a)
3479                 c.mcast_ttl = nla_get_u8(a);
3480
3481         /* The synchronization protocol is incompatible with mixed family
3482          * services
3483          */
3484         if (ipvs->mixed_address_family_dests > 0)
3485                 return -EINVAL;
3486
3487         rtnl_lock();
3488         mutex_lock(&ipvs->sync_mutex);
3489         ret = start_sync_thread(ipvs, &c,
3490                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3491         mutex_unlock(&ipvs->sync_mutex);
3492         rtnl_unlock();
3493         return ret;
3494 }
3495
3496 static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
3497 {
3498         int ret;
3499
3500         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3501                 return -EINVAL;
3502
3503         mutex_lock(&ipvs->sync_mutex);
3504         ret = stop_sync_thread(ipvs,
3505                                nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3506         mutex_unlock(&ipvs->sync_mutex);
3507         return ret;
3508 }
3509
3510 static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
3511 {
3512         struct ip_vs_timeout_user t;
3513
3514         __ip_vs_get_timeouts(ipvs, &t);
3515
3516         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3517                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3518
3519         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3520                 t.tcp_fin_timeout =
3521                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3522
3523         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3524                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3525
3526         return ip_vs_set_timeout(ipvs, &t);
3527 }
3528
3529 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3530 {
3531         int ret = -EINVAL, cmd;
3532         struct net *net = sock_net(skb->sk);
3533         struct netns_ipvs *ipvs = net_ipvs(net);
3534
3535         cmd = info->genlhdr->cmd;
3536
3537         if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3538                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3539
3540                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3541                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3542                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3543                                      ip_vs_daemon_policy))
3544                         goto out;
3545
3546                 if (cmd == IPVS_CMD_NEW_DAEMON)
3547                         ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
3548                 else
3549                         ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
3550         }
3551
3552 out:
3553         return ret;
3554 }
3555
3556 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3557 {
3558         struct ip_vs_service *svc = NULL;
3559         struct ip_vs_service_user_kern usvc;
3560         struct ip_vs_dest_user_kern udest;
3561         int ret = 0, cmd;
3562         int need_full_svc = 0, need_full_dest = 0;
3563         struct net *net = sock_net(skb->sk);
3564         struct netns_ipvs *ipvs = net_ipvs(net);
3565
3566         cmd = info->genlhdr->cmd;
3567
3568         mutex_lock(&__ip_vs_mutex);
3569
3570         if (cmd == IPVS_CMD_FLUSH) {
3571                 ret = ip_vs_flush(ipvs, false);
3572                 goto out;
3573         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3574                 ret = ip_vs_genl_set_config(ipvs, info->attrs);
3575                 goto out;
3576         } else if (cmd == IPVS_CMD_ZERO &&
3577                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3578                 ret = ip_vs_zero_all(ipvs);
3579                 goto out;
3580         }
3581
3582         /* All following commands require a service argument, so check if we
3583          * received a valid one. We need a full service specification when
3584          * adding / editing a service. Only identifying members otherwise. */
3585         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3586                 need_full_svc = 1;
3587
3588         ret = ip_vs_genl_parse_service(ipvs, &usvc,
3589                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3590                                        need_full_svc, &svc);
3591         if (ret)
3592                 goto out;
3593
3594         /* Unless we're adding a new service, the service must already exist */
3595         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3596                 ret = -ESRCH;
3597                 goto out;
3598         }
3599
3600         /* Destination commands require a valid destination argument. For
3601          * adding / editing a destination, we need a full destination
3602          * specification. */
3603         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3604             cmd == IPVS_CMD_DEL_DEST) {
3605                 if (cmd != IPVS_CMD_DEL_DEST)
3606                         need_full_dest = 1;
3607
3608                 ret = ip_vs_genl_parse_dest(&udest,
3609                                             info->attrs[IPVS_CMD_ATTR_DEST],
3610                                             need_full_dest);
3611                 if (ret)
3612                         goto out;
3613
3614                 /* Old protocols did not allow the user to specify address
3615                  * family, so we set it to zero instead.  We also didn't
3616                  * allow heterogeneous pools in the old code, so it's safe
3617                  * to assume that this will have the same address family as
3618                  * the service.
3619                  */
3620                 if (udest.af == 0)
3621                         udest.af = svc->af;
3622
3623                 if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
3624                         /* The synchronization protocol is incompatible
3625                          * with mixed family services
3626                          */
3627                         if (ipvs->sync_state) {
3628                                 ret = -EINVAL;
3629                                 goto out;
3630                         }
3631
3632                         /* Which connection types do we support? */
3633                         switch (udest.conn_flags) {
3634                         case IP_VS_CONN_F_TUNNEL:
3635                                 /* We are able to forward this */
3636                                 break;
3637                         default:
3638                                 ret = -EINVAL;
3639                                 goto out;
3640                         }
3641                 }
3642         }
3643
3644         switch (cmd) {
3645         case IPVS_CMD_NEW_SERVICE:
3646                 if (svc == NULL)
3647                         ret = ip_vs_add_service(ipvs, &usvc, &svc);
3648                 else
3649                         ret = -EEXIST;
3650                 break;
3651         case IPVS_CMD_SET_SERVICE:
3652                 ret = ip_vs_edit_service(svc, &usvc);
3653                 break;
3654         case IPVS_CMD_DEL_SERVICE:
3655                 ret = ip_vs_del_service(svc);
3656                 /* do not use svc, it can be freed */
3657                 break;
3658         case IPVS_CMD_NEW_DEST:
3659                 ret = ip_vs_add_dest(svc, &udest);
3660                 break;
3661         case IPVS_CMD_SET_DEST:
3662                 ret = ip_vs_edit_dest(svc, &udest);
3663                 break;
3664         case IPVS_CMD_DEL_DEST:
3665                 ret = ip_vs_del_dest(svc, &udest);
3666                 break;
3667         case IPVS_CMD_ZERO:
3668                 ret = ip_vs_zero_service(svc);
3669                 break;
3670         default:
3671                 ret = -EINVAL;
3672         }
3673
3674 out:
3675         mutex_unlock(&__ip_vs_mutex);
3676
3677         return ret;
3678 }
3679
3680 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3681 {
3682         struct sk_buff *msg;
3683         void *reply;
3684         int ret, cmd, reply_cmd;
3685         struct net *net = sock_net(skb->sk);
3686         struct netns_ipvs *ipvs = net_ipvs(net);
3687
3688         cmd = info->genlhdr->cmd;
3689
3690         if (cmd == IPVS_CMD_GET_SERVICE)
3691                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3692         else if (cmd == IPVS_CMD_GET_INFO)
3693                 reply_cmd = IPVS_CMD_SET_INFO;
3694         else if (cmd == IPVS_CMD_GET_CONFIG)
3695                 reply_cmd = IPVS_CMD_SET_CONFIG;
3696         else {
3697                 pr_err("unknown Generic Netlink command\n");
3698                 return -EINVAL;
3699         }
3700
3701         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3702         if (!msg)
3703                 return -ENOMEM;
3704
3705         mutex_lock(&__ip_vs_mutex);
3706
3707         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3708         if (reply == NULL)
3709                 goto nla_put_failure;
3710
3711         switch (cmd) {
3712         case IPVS_CMD_GET_SERVICE:
3713         {
3714                 struct ip_vs_service *svc;
3715
3716                 svc = ip_vs_genl_find_service(ipvs,
3717                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3718                 if (IS_ERR(svc)) {
3719                         ret = PTR_ERR(svc);
3720                         goto out_err;
3721                 } else if (svc) {
3722                         ret = ip_vs_genl_fill_service(msg, svc);
3723                         if (ret)
3724                                 goto nla_put_failure;
3725                 } else {
3726                         ret = -ESRCH;
3727                         goto out_err;
3728                 }
3729
3730                 break;
3731         }
3732
3733         case IPVS_CMD_GET_CONFIG:
3734         {
3735                 struct ip_vs_timeout_user t;
3736
3737                 __ip_vs_get_timeouts(ipvs, &t);
3738 #ifdef CONFIG_IP_VS_PROTO_TCP
3739                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3740                                 t.tcp_timeout) ||
3741                     nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3742                                 t.tcp_fin_timeout))
3743                         goto nla_put_failure;
3744 #endif
3745 #ifdef CONFIG_IP_VS_PROTO_UDP
3746                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3747                         goto nla_put_failure;
3748 #endif
3749
3750                 break;
3751         }
3752
3753         case IPVS_CMD_GET_INFO:
3754                 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3755                                 IP_VS_VERSION_CODE) ||
3756                     nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3757                                 ip_vs_conn_tab_size))
3758                         goto nla_put_failure;
3759                 break;
3760         }
3761
3762         genlmsg_end(msg, reply);
3763         ret = genlmsg_reply(msg, info);
3764         goto out;
3765
3766 nla_put_failure:
3767         pr_err("not enough space in Netlink message\n");
3768         ret = -EMSGSIZE;
3769
3770 out_err:
3771         nlmsg_free(msg);
3772 out:
3773         mutex_unlock(&__ip_vs_mutex);
3774
3775         return ret;
3776 }
3777
3778
3779 static const struct genl_ops ip_vs_genl_ops[] = {
3780         {
3781                 .cmd    = IPVS_CMD_NEW_SERVICE,
3782                 .flags  = GENL_ADMIN_PERM,
3783                 .policy = ip_vs_cmd_policy,
3784                 .doit   = ip_vs_genl_set_cmd,
3785         },
3786         {
3787                 .cmd    = IPVS_CMD_SET_SERVICE,
3788                 .flags  = GENL_ADMIN_PERM,
3789                 .policy = ip_vs_cmd_policy,
3790                 .doit   = ip_vs_genl_set_cmd,
3791         },
3792         {
3793                 .cmd    = IPVS_CMD_DEL_SERVICE,
3794                 .flags  = GENL_ADMIN_PERM,
3795                 .policy = ip_vs_cmd_policy,
3796                 .doit   = ip_vs_genl_set_cmd,
3797         },
3798         {
3799                 .cmd    = IPVS_CMD_GET_SERVICE,
3800                 .flags  = GENL_ADMIN_PERM,
3801                 .doit   = ip_vs_genl_get_cmd,
3802                 .dumpit = ip_vs_genl_dump_services,
3803                 .policy = ip_vs_cmd_policy,
3804         },
3805         {
3806                 .cmd    = IPVS_CMD_NEW_DEST,
3807                 .flags  = GENL_ADMIN_PERM,
3808                 .policy = ip_vs_cmd_policy,
3809                 .doit   = ip_vs_genl_set_cmd,
3810         },
3811         {
3812                 .cmd    = IPVS_CMD_SET_DEST,
3813                 .flags  = GENL_ADMIN_PERM,
3814                 .policy = ip_vs_cmd_policy,
3815                 .doit   = ip_vs_genl_set_cmd,
3816         },
3817         {
3818                 .cmd    = IPVS_CMD_DEL_DEST,
3819                 .flags  = GENL_ADMIN_PERM,
3820                 .policy = ip_vs_cmd_policy,
3821                 .doit   = ip_vs_genl_set_cmd,
3822         },
3823         {
3824                 .cmd    = IPVS_CMD_GET_DEST,
3825                 .flags  = GENL_ADMIN_PERM,
3826                 .policy = ip_vs_cmd_policy,
3827                 .dumpit = ip_vs_genl_dump_dests,
3828         },
3829         {
3830                 .cmd    = IPVS_CMD_NEW_DAEMON,
3831                 .flags  = GENL_ADMIN_PERM,
3832                 .policy = ip_vs_cmd_policy,
3833                 .doit   = ip_vs_genl_set_daemon,
3834         },
3835         {
3836                 .cmd    = IPVS_CMD_DEL_DAEMON,
3837                 .flags  = GENL_ADMIN_PERM,
3838                 .policy = ip_vs_cmd_policy,
3839                 .doit   = ip_vs_genl_set_daemon,
3840         },
3841         {
3842                 .cmd    = IPVS_CMD_GET_DAEMON,
3843                 .flags  = GENL_ADMIN_PERM,
3844                 .dumpit = ip_vs_genl_dump_daemons,
3845         },
3846         {
3847                 .cmd    = IPVS_CMD_SET_CONFIG,
3848                 .flags  = GENL_ADMIN_PERM,
3849                 .policy = ip_vs_cmd_policy,
3850                 .doit   = ip_vs_genl_set_cmd,
3851         },
3852         {
3853                 .cmd    = IPVS_CMD_GET_CONFIG,
3854                 .flags  = GENL_ADMIN_PERM,
3855                 .doit   = ip_vs_genl_get_cmd,
3856         },
3857         {
3858                 .cmd    = IPVS_CMD_GET_INFO,
3859                 .flags  = GENL_ADMIN_PERM,
3860                 .doit   = ip_vs_genl_get_cmd,
3861         },
3862         {
3863                 .cmd    = IPVS_CMD_ZERO,
3864                 .flags  = GENL_ADMIN_PERM,
3865                 .policy = ip_vs_cmd_policy,
3866                 .doit   = ip_vs_genl_set_cmd,
3867         },
3868         {
3869                 .cmd    = IPVS_CMD_FLUSH,
3870                 .flags  = GENL_ADMIN_PERM,
3871                 .doit   = ip_vs_genl_set_cmd,
3872         },
3873 };
3874
3875 static int __init ip_vs_genl_register(void)
3876 {
3877         return genl_register_family_with_ops(&ip_vs_genl_family,
3878                                              ip_vs_genl_ops);
3879 }
3880
3881 static void ip_vs_genl_unregister(void)
3882 {
3883         genl_unregister_family(&ip_vs_genl_family);
3884 }
3885
3886 /* End of Generic Netlink interface definitions */
3887
3888 /*
3889  * per netns intit/exit func.
3890  */
3891 #ifdef CONFIG_SYSCTL
3892 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
3893 {
3894         struct net *net = ipvs->net;
3895         int idx;
3896         struct ctl_table *tbl;
3897
3898         atomic_set(&ipvs->dropentry, 0);
3899         spin_lock_init(&ipvs->dropentry_lock);
3900         spin_lock_init(&ipvs->droppacket_lock);
3901         spin_lock_init(&ipvs->securetcp_lock);
3902
3903         if (!net_eq(net, &init_net)) {
3904                 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3905                 if (tbl == NULL)
3906                         return -ENOMEM;
3907
3908                 /* Don't export sysctls to unprivileged users */
3909                 if (net->user_ns != &init_user_ns)
3910                         tbl[0].procname = NULL;
3911         } else
3912                 tbl = vs_vars;
3913         /* Initialize sysctl defaults */
3914         for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
3915                 if (tbl[idx].proc_handler == proc_do_defense_mode)
3916                         tbl[idx].extra2 = ipvs;
3917         }
3918         idx = 0;
3919         ipvs->sysctl_amemthresh = 1024;
3920         tbl[idx++].data = &ipvs->sysctl_amemthresh;
3921         ipvs->sysctl_am_droprate = 10;
3922         tbl[idx++].data = &ipvs->sysctl_am_droprate;
3923         tbl[idx++].data = &ipvs->sysctl_drop_entry;
3924         tbl[idx++].data = &ipvs->sysctl_drop_packet;
3925 #ifdef CONFIG_IP_VS_NFCT
3926         tbl[idx++].data = &ipvs->sysctl_conntrack;
3927 #endif
3928         tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3929         ipvs->sysctl_snat_reroute = 1;
3930         tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3931         ipvs->sysctl_sync_ver = 1;
3932         tbl[idx++].data = &ipvs->sysctl_sync_ver;
3933         ipvs->sysctl_sync_ports = 1;
3934         tbl[idx++].data = &ipvs->sysctl_sync_ports;
3935         tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
3936         ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3937         tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3938         ipvs->sysctl_sync_sock_size = 0;
3939         tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3940         tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3941         tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3942         tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
3943         tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
3944         tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3945         ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3946         ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3947         tbl[idx].data = &ipvs->sysctl_sync_threshold;
3948         tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3949         ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3950         tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3951         ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3952         tbl[idx++].data = &ipvs->sysctl_sync_retries;
3953         tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3954         ipvs->sysctl_pmtu_disc = 1;
3955         tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
3956         tbl[idx++].data = &ipvs->sysctl_backup_only;
3957         ipvs->sysctl_conn_reuse_mode = 1;
3958         tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
3959         tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
3960         tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
3961
3962         ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3963         if (ipvs->sysctl_hdr == NULL) {
3964                 if (!net_eq(net, &init_net))
3965                         kfree(tbl);
3966                 return -ENOMEM;
3967         }
3968         ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
3969         ipvs->sysctl_tbl = tbl;
3970         /* Schedule defense work */
3971         INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3972         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3973
3974         return 0;
3975 }
3976
3977 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
3978 {
3979         struct net *net = ipvs->net;
3980
3981         cancel_delayed_work_sync(&ipvs->defense_work);
3982         cancel_work_sync(&ipvs->defense_work.work);
3983         unregister_net_sysctl_table(ipvs->sysctl_hdr);
3984         ip_vs_stop_estimator(ipvs, &ipvs->tot_stats);
3985
3986         if (!net_eq(net, &init_net))
3987                 kfree(ipvs->sysctl_tbl);
3988 }
3989
3990 #else
3991
3992 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
3993 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
3994
3995 #endif
3996
3997 static struct notifier_block ip_vs_dst_notifier = {
3998         .notifier_call = ip_vs_dst_event,
3999 };
4000
4001 int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
4002 {
4003         int i, idx;
4004
4005         /* Initialize rs_table */
4006         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
4007                 INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
4008
4009         INIT_LIST_HEAD(&ipvs->dest_trash);
4010         spin_lock_init(&ipvs->dest_trash_lock);
4011         setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
4012                     (unsigned long) ipvs);
4013         atomic_set(&ipvs->ftpsvc_counter, 0);
4014         atomic_set(&ipvs->nullsvc_counter, 0);
4015         atomic_set(&ipvs->conn_out_counter, 0);
4016
4017         /* procfs stats */
4018         ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
4019         if (!ipvs->tot_stats.cpustats)
4020                 return -ENOMEM;
4021
4022         for_each_possible_cpu(i) {
4023                 struct ip_vs_cpu_stats *ipvs_tot_stats;
4024                 ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i);
4025                 u64_stats_init(&ipvs_tot_stats->syncp);
4026         }
4027
4028         spin_lock_init(&ipvs->tot_stats.lock);
4029
4030         proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops);
4031         proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops);
4032         proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
4033                     &ip_vs_stats_percpu_fops);
4034
4035         if (ip_vs_control_net_init_sysctl(ipvs))
4036                 goto err;
4037
4038         return 0;
4039
4040 err:
4041         free_percpu(ipvs->tot_stats.cpustats);
4042         return -ENOMEM;
4043 }
4044
4045 void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
4046 {
4047         ip_vs_trash_cleanup(ipvs);
4048         ip_vs_control_net_cleanup_sysctl(ipvs);
4049         remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
4050         remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
4051         remove_proc_entry("ip_vs", ipvs->net->proc_net);
4052         free_percpu(ipvs->tot_stats.cpustats);
4053 }
4054
4055 int __init ip_vs_register_nl_ioctl(void)
4056 {
4057         int ret;
4058
4059         ret = nf_register_sockopt(&ip_vs_sockopts);
4060         if (ret) {
4061                 pr_err("cannot register sockopt.\n");
4062                 goto err_sock;
4063         }
4064
4065         ret = ip_vs_genl_register();
4066         if (ret) {
4067                 pr_err("cannot register Generic Netlink interface.\n");
4068                 goto err_genl;
4069         }
4070         return 0;
4071
4072 err_genl:
4073         nf_unregister_sockopt(&ip_vs_sockopts);
4074 err_sock:
4075         return ret;
4076 }
4077
4078 void ip_vs_unregister_nl_ioctl(void)
4079 {
4080         ip_vs_genl_unregister();
4081         nf_unregister_sockopt(&ip_vs_sockopts);
4082 }
4083
4084 int __init ip_vs_control_init(void)
4085 {
4086         int idx;
4087         int ret;
4088
4089         EnterFunction(2);
4090
4091         /* Initialize svc_table, ip_vs_svc_fwm_table */
4092         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
4093                 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
4094                 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
4095         }
4096
4097         smp_wmb();      /* Do we really need it now ? */
4098
4099         ret = register_netdevice_notifier(&ip_vs_dst_notifier);
4100         if (ret < 0)
4101                 return ret;
4102
4103         LeaveFunction(2);
4104         return 0;
4105 }
4106
4107
4108 void ip_vs_control_cleanup(void)
4109 {
4110         EnterFunction(2);
4111         unregister_netdevice_notifier(&ip_vs_dst_notifier);
4112         LeaveFunction(2);
4113 }