netfilter: nfnetlink: silence warning if CONFIG_PROVE_RCU isn't set
[cascardo/linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76         },
77 };
78
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80         .data = {
81                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86         },
87 };
88
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93         [IFA_LOCAL]             = { .type = NLA_U32 },
94         [IFA_ADDRESS]           = { .type = NLA_U32 },
95         [IFA_BROADCAST]         = { .type = NLA_U32 },
96         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
98 };
99
100 #define IN4_ADDR_HSIZE_SHIFT    8
101 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
102
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108         u32 val = (__force u32) addr ^ net_hash_mix(net);
109
110         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115         u32 hash = inet_addr_hash(net, ifa->ifa_local);
116
117         spin_lock(&inet_addr_hash_lock);
118         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119         spin_unlock(&inet_addr_hash_lock);
120 }
121
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124         spin_lock(&inet_addr_hash_lock);
125         hlist_del_init_rcu(&ifa->hash);
126         spin_unlock(&inet_addr_hash_lock);
127 }
128
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139         u32 hash = inet_addr_hash(net, addr);
140         struct net_device *result = NULL;
141         struct in_ifaddr *ifa;
142         struct hlist_node *node;
143
144         rcu_read_lock();
145         hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
146                 if (ifa->ifa_local == addr) {
147                         struct net_device *dev = ifa->ifa_dev->dev;
148
149                         if (!net_eq(dev_net(dev), net))
150                                 continue;
151                         result = dev;
152                         break;
153                 }
154         }
155         if (!result) {
156                 struct flowi4 fl4 = { .daddr = addr };
157                 struct fib_result res = { 0 };
158                 struct fib_table *local;
159
160                 /* Fallback to FIB local table so that communication
161                  * over loopback subnets work.
162                  */
163                 local = fib_get_table(net, RT_TABLE_LOCAL);
164                 if (local &&
165                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
166                     res.type == RTN_LOCAL)
167                         result = FIB_RES_DEV(res);
168         }
169         if (result && devref)
170                 dev_hold(result);
171         rcu_read_unlock();
172         return result;
173 }
174 EXPORT_SYMBOL(__ip_dev_find);
175
176 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
177
178 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
179 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
180                          int destroy);
181 #ifdef CONFIG_SYSCTL
182 static void devinet_sysctl_register(struct in_device *idev);
183 static void devinet_sysctl_unregister(struct in_device *idev);
184 #else
185 static void devinet_sysctl_register(struct in_device *idev)
186 {
187 }
188 static void devinet_sysctl_unregister(struct in_device *idev)
189 {
190 }
191 #endif
192
193 /* Locks all the inet devices. */
194
195 static struct in_ifaddr *inet_alloc_ifa(void)
196 {
197         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
198 }
199
200 static void inet_rcu_free_ifa(struct rcu_head *head)
201 {
202         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
203         if (ifa->ifa_dev)
204                 in_dev_put(ifa->ifa_dev);
205         kfree(ifa);
206 }
207
208 static void inet_free_ifa(struct in_ifaddr *ifa)
209 {
210         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
211 }
212
213 void in_dev_finish_destroy(struct in_device *idev)
214 {
215         struct net_device *dev = idev->dev;
216
217         WARN_ON(idev->ifa_list);
218         WARN_ON(idev->mc_list);
219 #ifdef NET_REFCNT_DEBUG
220         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
221 #endif
222         dev_put(dev);
223         if (!idev->dead)
224                 pr_err("Freeing alive in_device %p\n", idev);
225         else
226                 kfree(idev);
227 }
228 EXPORT_SYMBOL(in_dev_finish_destroy);
229
230 static struct in_device *inetdev_init(struct net_device *dev)
231 {
232         struct in_device *in_dev;
233
234         ASSERT_RTNL();
235
236         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
237         if (!in_dev)
238                 goto out;
239         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
240                         sizeof(in_dev->cnf));
241         in_dev->cnf.sysctl = NULL;
242         in_dev->dev = dev;
243         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
244         if (!in_dev->arp_parms)
245                 goto out_kfree;
246         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
247                 dev_disable_lro(dev);
248         /* Reference in_dev->dev */
249         dev_hold(dev);
250         /* Account for reference dev->ip_ptr (below) */
251         in_dev_hold(in_dev);
252
253         devinet_sysctl_register(in_dev);
254         ip_mc_init_dev(in_dev);
255         if (dev->flags & IFF_UP)
256                 ip_mc_up(in_dev);
257
258         /* we can receive as soon as ip_ptr is set -- do this last */
259         rcu_assign_pointer(dev->ip_ptr, in_dev);
260 out:
261         return in_dev;
262 out_kfree:
263         kfree(in_dev);
264         in_dev = NULL;
265         goto out;
266 }
267
268 static void in_dev_rcu_put(struct rcu_head *head)
269 {
270         struct in_device *idev = container_of(head, struct in_device, rcu_head);
271         in_dev_put(idev);
272 }
273
274 static void inetdev_destroy(struct in_device *in_dev)
275 {
276         struct in_ifaddr *ifa;
277         struct net_device *dev;
278
279         ASSERT_RTNL();
280
281         dev = in_dev->dev;
282
283         in_dev->dead = 1;
284
285         ip_mc_destroy_dev(in_dev);
286
287         while ((ifa = in_dev->ifa_list) != NULL) {
288                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
289                 inet_free_ifa(ifa);
290         }
291
292         RCU_INIT_POINTER(dev->ip_ptr, NULL);
293
294         devinet_sysctl_unregister(in_dev);
295         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
296         arp_ifdown(dev);
297
298         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
299 }
300
301 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
302 {
303         rcu_read_lock();
304         for_primary_ifa(in_dev) {
305                 if (inet_ifa_match(a, ifa)) {
306                         if (!b || inet_ifa_match(b, ifa)) {
307                                 rcu_read_unlock();
308                                 return 1;
309                         }
310                 }
311         } endfor_ifa(in_dev);
312         rcu_read_unlock();
313         return 0;
314 }
315
316 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
317                          int destroy, struct nlmsghdr *nlh, u32 portid)
318 {
319         struct in_ifaddr *promote = NULL;
320         struct in_ifaddr *ifa, *ifa1 = *ifap;
321         struct in_ifaddr *last_prim = in_dev->ifa_list;
322         struct in_ifaddr *prev_prom = NULL;
323         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
324
325         ASSERT_RTNL();
326
327         /* 1. Deleting primary ifaddr forces deletion all secondaries
328          * unless alias promotion is set
329          **/
330
331         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
332                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
333
334                 while ((ifa = *ifap1) != NULL) {
335                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
336                             ifa1->ifa_scope <= ifa->ifa_scope)
337                                 last_prim = ifa;
338
339                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
340                             ifa1->ifa_mask != ifa->ifa_mask ||
341                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
342                                 ifap1 = &ifa->ifa_next;
343                                 prev_prom = ifa;
344                                 continue;
345                         }
346
347                         if (!do_promote) {
348                                 inet_hash_remove(ifa);
349                                 *ifap1 = ifa->ifa_next;
350
351                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
352                                 blocking_notifier_call_chain(&inetaddr_chain,
353                                                 NETDEV_DOWN, ifa);
354                                 inet_free_ifa(ifa);
355                         } else {
356                                 promote = ifa;
357                                 break;
358                         }
359                 }
360         }
361
362         /* On promotion all secondaries from subnet are changing
363          * the primary IP, we must remove all their routes silently
364          * and later to add them back with new prefsrc. Do this
365          * while all addresses are on the device list.
366          */
367         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
368                 if (ifa1->ifa_mask == ifa->ifa_mask &&
369                     inet_ifa_match(ifa1->ifa_address, ifa))
370                         fib_del_ifaddr(ifa, ifa1);
371         }
372
373         /* 2. Unlink it */
374
375         *ifap = ifa1->ifa_next;
376         inet_hash_remove(ifa1);
377
378         /* 3. Announce address deletion */
379
380         /* Send message first, then call notifier.
381            At first sight, FIB update triggered by notifier
382            will refer to already deleted ifaddr, that could confuse
383            netlink listeners. It is not true: look, gated sees
384            that route deleted and if it still thinks that ifaddr
385            is valid, it will try to restore deleted routes... Grr.
386            So that, this order is correct.
387          */
388         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
389         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
390
391         if (promote) {
392                 struct in_ifaddr *next_sec = promote->ifa_next;
393
394                 if (prev_prom) {
395                         prev_prom->ifa_next = promote->ifa_next;
396                         promote->ifa_next = last_prim->ifa_next;
397                         last_prim->ifa_next = promote;
398                 }
399
400                 promote->ifa_flags &= ~IFA_F_SECONDARY;
401                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
402                 blocking_notifier_call_chain(&inetaddr_chain,
403                                 NETDEV_UP, promote);
404                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
405                         if (ifa1->ifa_mask != ifa->ifa_mask ||
406                             !inet_ifa_match(ifa1->ifa_address, ifa))
407                                         continue;
408                         fib_add_ifaddr(ifa);
409                 }
410
411         }
412         if (destroy)
413                 inet_free_ifa(ifa1);
414 }
415
416 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
417                          int destroy)
418 {
419         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
420 }
421
422 static void check_lifetime(struct work_struct *work);
423
424 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
425
426 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
427                              u32 portid)
428 {
429         struct in_device *in_dev = ifa->ifa_dev;
430         struct in_ifaddr *ifa1, **ifap, **last_primary;
431
432         ASSERT_RTNL();
433
434         if (!ifa->ifa_local) {
435                 inet_free_ifa(ifa);
436                 return 0;
437         }
438
439         ifa->ifa_flags &= ~IFA_F_SECONDARY;
440         last_primary = &in_dev->ifa_list;
441
442         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
443              ifap = &ifa1->ifa_next) {
444                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
445                     ifa->ifa_scope <= ifa1->ifa_scope)
446                         last_primary = &ifa1->ifa_next;
447                 if (ifa1->ifa_mask == ifa->ifa_mask &&
448                     inet_ifa_match(ifa1->ifa_address, ifa)) {
449                         if (ifa1->ifa_local == ifa->ifa_local) {
450                                 inet_free_ifa(ifa);
451                                 return -EEXIST;
452                         }
453                         if (ifa1->ifa_scope != ifa->ifa_scope) {
454                                 inet_free_ifa(ifa);
455                                 return -EINVAL;
456                         }
457                         ifa->ifa_flags |= IFA_F_SECONDARY;
458                 }
459         }
460
461         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
462                 net_srandom(ifa->ifa_local);
463                 ifap = last_primary;
464         }
465
466         ifa->ifa_next = *ifap;
467         *ifap = ifa;
468
469         inet_hash_insert(dev_net(in_dev->dev), ifa);
470
471         cancel_delayed_work(&check_lifetime_work);
472         schedule_delayed_work(&check_lifetime_work, 0);
473
474         /* Send message first, then call notifier.
475            Notifier will trigger FIB update, so that
476            listeners of netlink will know about new ifaddr */
477         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
478         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
479
480         return 0;
481 }
482
483 static int inet_insert_ifa(struct in_ifaddr *ifa)
484 {
485         return __inet_insert_ifa(ifa, NULL, 0);
486 }
487
488 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
489 {
490         struct in_device *in_dev = __in_dev_get_rtnl(dev);
491
492         ASSERT_RTNL();
493
494         if (!in_dev) {
495                 inet_free_ifa(ifa);
496                 return -ENOBUFS;
497         }
498         ipv4_devconf_setall(in_dev);
499         if (ifa->ifa_dev != in_dev) {
500                 WARN_ON(ifa->ifa_dev);
501                 in_dev_hold(in_dev);
502                 ifa->ifa_dev = in_dev;
503         }
504         if (ipv4_is_loopback(ifa->ifa_local))
505                 ifa->ifa_scope = RT_SCOPE_HOST;
506         return inet_insert_ifa(ifa);
507 }
508
509 /* Caller must hold RCU or RTNL :
510  * We dont take a reference on found in_device
511  */
512 struct in_device *inetdev_by_index(struct net *net, int ifindex)
513 {
514         struct net_device *dev;
515         struct in_device *in_dev = NULL;
516
517         rcu_read_lock();
518         dev = dev_get_by_index_rcu(net, ifindex);
519         if (dev)
520                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
521         rcu_read_unlock();
522         return in_dev;
523 }
524 EXPORT_SYMBOL(inetdev_by_index);
525
526 /* Called only from RTNL semaphored context. No locks. */
527
528 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
529                                     __be32 mask)
530 {
531         ASSERT_RTNL();
532
533         for_primary_ifa(in_dev) {
534                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
535                         return ifa;
536         } endfor_ifa(in_dev);
537         return NULL;
538 }
539
540 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
541 {
542         struct net *net = sock_net(skb->sk);
543         struct nlattr *tb[IFA_MAX+1];
544         struct in_device *in_dev;
545         struct ifaddrmsg *ifm;
546         struct in_ifaddr *ifa, **ifap;
547         int err = -EINVAL;
548
549         ASSERT_RTNL();
550
551         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
552         if (err < 0)
553                 goto errout;
554
555         ifm = nlmsg_data(nlh);
556         in_dev = inetdev_by_index(net, ifm->ifa_index);
557         if (in_dev == NULL) {
558                 err = -ENODEV;
559                 goto errout;
560         }
561
562         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
563              ifap = &ifa->ifa_next) {
564                 if (tb[IFA_LOCAL] &&
565                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
566                         continue;
567
568                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
569                         continue;
570
571                 if (tb[IFA_ADDRESS] &&
572                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
573                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
574                         continue;
575
576                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
577                 return 0;
578         }
579
580         err = -EADDRNOTAVAIL;
581 errout:
582         return err;
583 }
584
585 #define INFINITY_LIFE_TIME      0xFFFFFFFF
586
587 static void check_lifetime(struct work_struct *work)
588 {
589         unsigned long now, next, next_sec, next_sched;
590         struct in_ifaddr *ifa;
591         struct hlist_node *node;
592         int i;
593
594         now = jiffies;
595         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
596
597         rcu_read_lock();
598         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
599                 hlist_for_each_entry_rcu(ifa, node,
600                                          &inet_addr_lst[i], hash) {
601                         unsigned long age;
602
603                         if (ifa->ifa_flags & IFA_F_PERMANENT)
604                                 continue;
605
606                         /* We try to batch several events at once. */
607                         age = (now - ifa->ifa_tstamp +
608                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
609
610                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
611                             age >= ifa->ifa_valid_lft) {
612                                 struct in_ifaddr **ifap ;
613
614                                 rtnl_lock();
615                                 for (ifap = &ifa->ifa_dev->ifa_list;
616                                      *ifap != NULL; ifap = &ifa->ifa_next) {
617                                         if (*ifap == ifa)
618                                                 inet_del_ifa(ifa->ifa_dev,
619                                                              ifap, 1);
620                                 }
621                                 rtnl_unlock();
622                         } else if (ifa->ifa_preferred_lft ==
623                                    INFINITY_LIFE_TIME) {
624                                 continue;
625                         } else if (age >= ifa->ifa_preferred_lft) {
626                                 if (time_before(ifa->ifa_tstamp +
627                                                 ifa->ifa_valid_lft * HZ, next))
628                                         next = ifa->ifa_tstamp +
629                                                ifa->ifa_valid_lft * HZ;
630
631                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
632                                         ifa->ifa_flags |= IFA_F_DEPRECATED;
633                                         rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
634                                 }
635                         } else if (time_before(ifa->ifa_tstamp +
636                                                ifa->ifa_preferred_lft * HZ,
637                                                next)) {
638                                 next = ifa->ifa_tstamp +
639                                        ifa->ifa_preferred_lft * HZ;
640                         }
641                 }
642         }
643         rcu_read_unlock();
644
645         next_sec = round_jiffies_up(next);
646         next_sched = next;
647
648         /* If rounded timeout is accurate enough, accept it. */
649         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
650                 next_sched = next_sec;
651
652         now = jiffies;
653         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
654         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
655                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
656
657         schedule_delayed_work(&check_lifetime_work, next_sched - now);
658 }
659
660 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
661                              __u32 prefered_lft)
662 {
663         unsigned long timeout;
664
665         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
666
667         timeout = addrconf_timeout_fixup(valid_lft, HZ);
668         if (addrconf_finite_timeout(timeout))
669                 ifa->ifa_valid_lft = timeout;
670         else
671                 ifa->ifa_flags |= IFA_F_PERMANENT;
672
673         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
674         if (addrconf_finite_timeout(timeout)) {
675                 if (timeout == 0)
676                         ifa->ifa_flags |= IFA_F_DEPRECATED;
677                 ifa->ifa_preferred_lft = timeout;
678         }
679         ifa->ifa_tstamp = jiffies;
680         if (!ifa->ifa_cstamp)
681                 ifa->ifa_cstamp = ifa->ifa_tstamp;
682 }
683
684 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
685                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
686 {
687         struct nlattr *tb[IFA_MAX+1];
688         struct in_ifaddr *ifa;
689         struct ifaddrmsg *ifm;
690         struct net_device *dev;
691         struct in_device *in_dev;
692         int err;
693
694         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
695         if (err < 0)
696                 goto errout;
697
698         ifm = nlmsg_data(nlh);
699         err = -EINVAL;
700         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
701                 goto errout;
702
703         dev = __dev_get_by_index(net, ifm->ifa_index);
704         err = -ENODEV;
705         if (dev == NULL)
706                 goto errout;
707
708         in_dev = __in_dev_get_rtnl(dev);
709         err = -ENOBUFS;
710         if (in_dev == NULL)
711                 goto errout;
712
713         ifa = inet_alloc_ifa();
714         if (ifa == NULL)
715                 /*
716                  * A potential indev allocation can be left alive, it stays
717                  * assigned to its device and is destroy with it.
718                  */
719                 goto errout;
720
721         ipv4_devconf_setall(in_dev);
722         in_dev_hold(in_dev);
723
724         if (tb[IFA_ADDRESS] == NULL)
725                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
726
727         INIT_HLIST_NODE(&ifa->hash);
728         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
729         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
730         ifa->ifa_flags = ifm->ifa_flags;
731         ifa->ifa_scope = ifm->ifa_scope;
732         ifa->ifa_dev = in_dev;
733
734         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
735         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
736
737         if (tb[IFA_BROADCAST])
738                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
739
740         if (tb[IFA_LABEL])
741                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
742         else
743                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
744
745         if (tb[IFA_CACHEINFO]) {
746                 struct ifa_cacheinfo *ci;
747
748                 ci = nla_data(tb[IFA_CACHEINFO]);
749                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
750                         err = -EINVAL;
751                         goto errout;
752                 }
753                 *pvalid_lft = ci->ifa_valid;
754                 *pprefered_lft = ci->ifa_prefered;
755         }
756
757         return ifa;
758
759 errout:
760         return ERR_PTR(err);
761 }
762
763 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
764 {
765         struct in_device *in_dev = ifa->ifa_dev;
766         struct in_ifaddr *ifa1, **ifap;
767
768         if (!ifa->ifa_local)
769                 return NULL;
770
771         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
772              ifap = &ifa1->ifa_next) {
773                 if (ifa1->ifa_mask == ifa->ifa_mask &&
774                     inet_ifa_match(ifa1->ifa_address, ifa) &&
775                     ifa1->ifa_local == ifa->ifa_local)
776                         return ifa1;
777         }
778         return NULL;
779 }
780
781 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
782 {
783         struct net *net = sock_net(skb->sk);
784         struct in_ifaddr *ifa;
785         struct in_ifaddr *ifa_existing;
786         __u32 valid_lft = INFINITY_LIFE_TIME;
787         __u32 prefered_lft = INFINITY_LIFE_TIME;
788
789         ASSERT_RTNL();
790
791         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
792         if (IS_ERR(ifa))
793                 return PTR_ERR(ifa);
794
795         ifa_existing = find_matching_ifa(ifa);
796         if (!ifa_existing) {
797                 /* It would be best to check for !NLM_F_CREATE here but
798                  * userspace alreay relies on not having to provide this.
799                  */
800                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
801                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
802         } else {
803                 inet_free_ifa(ifa);
804
805                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
806                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
807                         return -EEXIST;
808
809                 set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft);
810         }
811         return 0;
812 }
813
814 /*
815  *      Determine a default network mask, based on the IP address.
816  */
817
818 static int inet_abc_len(__be32 addr)
819 {
820         int rc = -1;    /* Something else, probably a multicast. */
821
822         if (ipv4_is_zeronet(addr))
823                 rc = 0;
824         else {
825                 __u32 haddr = ntohl(addr);
826
827                 if (IN_CLASSA(haddr))
828                         rc = 8;
829                 else if (IN_CLASSB(haddr))
830                         rc = 16;
831                 else if (IN_CLASSC(haddr))
832                         rc = 24;
833         }
834
835         return rc;
836 }
837
838
839 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
840 {
841         struct ifreq ifr;
842         struct sockaddr_in sin_orig;
843         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
844         struct in_device *in_dev;
845         struct in_ifaddr **ifap = NULL;
846         struct in_ifaddr *ifa = NULL;
847         struct net_device *dev;
848         char *colon;
849         int ret = -EFAULT;
850         int tryaddrmatch = 0;
851
852         /*
853          *      Fetch the caller's info block into kernel space
854          */
855
856         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
857                 goto out;
858         ifr.ifr_name[IFNAMSIZ - 1] = 0;
859
860         /* save original address for comparison */
861         memcpy(&sin_orig, sin, sizeof(*sin));
862
863         colon = strchr(ifr.ifr_name, ':');
864         if (colon)
865                 *colon = 0;
866
867         dev_load(net, ifr.ifr_name);
868
869         switch (cmd) {
870         case SIOCGIFADDR:       /* Get interface address */
871         case SIOCGIFBRDADDR:    /* Get the broadcast address */
872         case SIOCGIFDSTADDR:    /* Get the destination address */
873         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
874                 /* Note that these ioctls will not sleep,
875                    so that we do not impose a lock.
876                    One day we will be forced to put shlock here (I mean SMP)
877                  */
878                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
879                 memset(sin, 0, sizeof(*sin));
880                 sin->sin_family = AF_INET;
881                 break;
882
883         case SIOCSIFFLAGS:
884                 ret = -EPERM;
885                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
886                         goto out;
887                 break;
888         case SIOCSIFADDR:       /* Set interface address (and family) */
889         case SIOCSIFBRDADDR:    /* Set the broadcast address */
890         case SIOCSIFDSTADDR:    /* Set the destination address */
891         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
892                 ret = -EPERM;
893                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
894                         goto out;
895                 ret = -EINVAL;
896                 if (sin->sin_family != AF_INET)
897                         goto out;
898                 break;
899         default:
900                 ret = -EINVAL;
901                 goto out;
902         }
903
904         rtnl_lock();
905
906         ret = -ENODEV;
907         dev = __dev_get_by_name(net, ifr.ifr_name);
908         if (!dev)
909                 goto done;
910
911         if (colon)
912                 *colon = ':';
913
914         in_dev = __in_dev_get_rtnl(dev);
915         if (in_dev) {
916                 if (tryaddrmatch) {
917                         /* Matthias Andree */
918                         /* compare label and address (4.4BSD style) */
919                         /* note: we only do this for a limited set of ioctls
920                            and only if the original address family was AF_INET.
921                            This is checked above. */
922                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
923                              ifap = &ifa->ifa_next) {
924                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
925                                     sin_orig.sin_addr.s_addr ==
926                                                         ifa->ifa_local) {
927                                         break; /* found */
928                                 }
929                         }
930                 }
931                 /* we didn't get a match, maybe the application is
932                    4.3BSD-style and passed in junk so we fall back to
933                    comparing just the label */
934                 if (!ifa) {
935                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
936                              ifap = &ifa->ifa_next)
937                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
938                                         break;
939                 }
940         }
941
942         ret = -EADDRNOTAVAIL;
943         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
944                 goto done;
945
946         switch (cmd) {
947         case SIOCGIFADDR:       /* Get interface address */
948                 sin->sin_addr.s_addr = ifa->ifa_local;
949                 goto rarok;
950
951         case SIOCGIFBRDADDR:    /* Get the broadcast address */
952                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
953                 goto rarok;
954
955         case SIOCGIFDSTADDR:    /* Get the destination address */
956                 sin->sin_addr.s_addr = ifa->ifa_address;
957                 goto rarok;
958
959         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
960                 sin->sin_addr.s_addr = ifa->ifa_mask;
961                 goto rarok;
962
963         case SIOCSIFFLAGS:
964                 if (colon) {
965                         ret = -EADDRNOTAVAIL;
966                         if (!ifa)
967                                 break;
968                         ret = 0;
969                         if (!(ifr.ifr_flags & IFF_UP))
970                                 inet_del_ifa(in_dev, ifap, 1);
971                         break;
972                 }
973                 ret = dev_change_flags(dev, ifr.ifr_flags);
974                 break;
975
976         case SIOCSIFADDR:       /* Set interface address (and family) */
977                 ret = -EINVAL;
978                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
979                         break;
980
981                 if (!ifa) {
982                         ret = -ENOBUFS;
983                         ifa = inet_alloc_ifa();
984                         if (!ifa)
985                                 break;
986                         INIT_HLIST_NODE(&ifa->hash);
987                         if (colon)
988                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
989                         else
990                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
991                 } else {
992                         ret = 0;
993                         if (ifa->ifa_local == sin->sin_addr.s_addr)
994                                 break;
995                         inet_del_ifa(in_dev, ifap, 0);
996                         ifa->ifa_broadcast = 0;
997                         ifa->ifa_scope = 0;
998                 }
999
1000                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1001
1002                 if (!(dev->flags & IFF_POINTOPOINT)) {
1003                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1004                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1005                         if ((dev->flags & IFF_BROADCAST) &&
1006                             ifa->ifa_prefixlen < 31)
1007                                 ifa->ifa_broadcast = ifa->ifa_address |
1008                                                      ~ifa->ifa_mask;
1009                 } else {
1010                         ifa->ifa_prefixlen = 32;
1011                         ifa->ifa_mask = inet_make_mask(32);
1012                 }
1013                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1014                 ret = inet_set_ifa(dev, ifa);
1015                 break;
1016
1017         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1018                 ret = 0;
1019                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1020                         inet_del_ifa(in_dev, ifap, 0);
1021                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1022                         inet_insert_ifa(ifa);
1023                 }
1024                 break;
1025
1026         case SIOCSIFDSTADDR:    /* Set the destination address */
1027                 ret = 0;
1028                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1029                         break;
1030                 ret = -EINVAL;
1031                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1032                         break;
1033                 ret = 0;
1034                 inet_del_ifa(in_dev, ifap, 0);
1035                 ifa->ifa_address = sin->sin_addr.s_addr;
1036                 inet_insert_ifa(ifa);
1037                 break;
1038
1039         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1040
1041                 /*
1042                  *      The mask we set must be legal.
1043                  */
1044                 ret = -EINVAL;
1045                 if (bad_mask(sin->sin_addr.s_addr, 0))
1046                         break;
1047                 ret = 0;
1048                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1049                         __be32 old_mask = ifa->ifa_mask;
1050                         inet_del_ifa(in_dev, ifap, 0);
1051                         ifa->ifa_mask = sin->sin_addr.s_addr;
1052                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1053
1054                         /* See if current broadcast address matches
1055                          * with current netmask, then recalculate
1056                          * the broadcast address. Otherwise it's a
1057                          * funny address, so don't touch it since
1058                          * the user seems to know what (s)he's doing...
1059                          */
1060                         if ((dev->flags & IFF_BROADCAST) &&
1061                             (ifa->ifa_prefixlen < 31) &&
1062                             (ifa->ifa_broadcast ==
1063                              (ifa->ifa_local|~old_mask))) {
1064                                 ifa->ifa_broadcast = (ifa->ifa_local |
1065                                                       ~sin->sin_addr.s_addr);
1066                         }
1067                         inet_insert_ifa(ifa);
1068                 }
1069                 break;
1070         }
1071 done:
1072         rtnl_unlock();
1073 out:
1074         return ret;
1075 rarok:
1076         rtnl_unlock();
1077         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1078         goto out;
1079 }
1080
1081 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1082 {
1083         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1084         struct in_ifaddr *ifa;
1085         struct ifreq ifr;
1086         int done = 0;
1087
1088         if (!in_dev)
1089                 goto out;
1090
1091         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1092                 if (!buf) {
1093                         done += sizeof(ifr);
1094                         continue;
1095                 }
1096                 if (len < (int) sizeof(ifr))
1097                         break;
1098                 memset(&ifr, 0, sizeof(struct ifreq));
1099                 if (ifa->ifa_label)
1100                         strcpy(ifr.ifr_name, ifa->ifa_label);
1101                 else
1102                         strcpy(ifr.ifr_name, dev->name);
1103
1104                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1105                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1106                                                                 ifa->ifa_local;
1107
1108                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1109                         done = -EFAULT;
1110                         break;
1111                 }
1112                 buf  += sizeof(struct ifreq);
1113                 len  -= sizeof(struct ifreq);
1114                 done += sizeof(struct ifreq);
1115         }
1116 out:
1117         return done;
1118 }
1119
1120 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1121 {
1122         __be32 addr = 0;
1123         struct in_device *in_dev;
1124         struct net *net = dev_net(dev);
1125
1126         rcu_read_lock();
1127         in_dev = __in_dev_get_rcu(dev);
1128         if (!in_dev)
1129                 goto no_in_dev;
1130
1131         for_primary_ifa(in_dev) {
1132                 if (ifa->ifa_scope > scope)
1133                         continue;
1134                 if (!dst || inet_ifa_match(dst, ifa)) {
1135                         addr = ifa->ifa_local;
1136                         break;
1137                 }
1138                 if (!addr)
1139                         addr = ifa->ifa_local;
1140         } endfor_ifa(in_dev);
1141
1142         if (addr)
1143                 goto out_unlock;
1144 no_in_dev:
1145
1146         /* Not loopback addresses on loopback should be preferred
1147            in this case. It is importnat that lo is the first interface
1148            in dev_base list.
1149          */
1150         for_each_netdev_rcu(net, dev) {
1151                 in_dev = __in_dev_get_rcu(dev);
1152                 if (!in_dev)
1153                         continue;
1154
1155                 for_primary_ifa(in_dev) {
1156                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1157                             ifa->ifa_scope <= scope) {
1158                                 addr = ifa->ifa_local;
1159                                 goto out_unlock;
1160                         }
1161                 } endfor_ifa(in_dev);
1162         }
1163 out_unlock:
1164         rcu_read_unlock();
1165         return addr;
1166 }
1167 EXPORT_SYMBOL(inet_select_addr);
1168
1169 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1170                               __be32 local, int scope)
1171 {
1172         int same = 0;
1173         __be32 addr = 0;
1174
1175         for_ifa(in_dev) {
1176                 if (!addr &&
1177                     (local == ifa->ifa_local || !local) &&
1178                     ifa->ifa_scope <= scope) {
1179                         addr = ifa->ifa_local;
1180                         if (same)
1181                                 break;
1182                 }
1183                 if (!same) {
1184                         same = (!local || inet_ifa_match(local, ifa)) &&
1185                                 (!dst || inet_ifa_match(dst, ifa));
1186                         if (same && addr) {
1187                                 if (local || !dst)
1188                                         break;
1189                                 /* Is the selected addr into dst subnet? */
1190                                 if (inet_ifa_match(addr, ifa))
1191                                         break;
1192                                 /* No, then can we use new local src? */
1193                                 if (ifa->ifa_scope <= scope) {
1194                                         addr = ifa->ifa_local;
1195                                         break;
1196                                 }
1197                                 /* search for large dst subnet for addr */
1198                                 same = 0;
1199                         }
1200                 }
1201         } endfor_ifa(in_dev);
1202
1203         return same ? addr : 0;
1204 }
1205
1206 /*
1207  * Confirm that local IP address exists using wildcards:
1208  * - in_dev: only on this interface, 0=any interface
1209  * - dst: only in the same subnet as dst, 0=any dst
1210  * - local: address, 0=autoselect the local address
1211  * - scope: maximum allowed scope value for the local address
1212  */
1213 __be32 inet_confirm_addr(struct in_device *in_dev,
1214                          __be32 dst, __be32 local, int scope)
1215 {
1216         __be32 addr = 0;
1217         struct net_device *dev;
1218         struct net *net;
1219
1220         if (scope != RT_SCOPE_LINK)
1221                 return confirm_addr_indev(in_dev, dst, local, scope);
1222
1223         net = dev_net(in_dev->dev);
1224         rcu_read_lock();
1225         for_each_netdev_rcu(net, dev) {
1226                 in_dev = __in_dev_get_rcu(dev);
1227                 if (in_dev) {
1228                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1229                         if (addr)
1230                                 break;
1231                 }
1232         }
1233         rcu_read_unlock();
1234
1235         return addr;
1236 }
1237 EXPORT_SYMBOL(inet_confirm_addr);
1238
1239 /*
1240  *      Device notifier
1241  */
1242
1243 int register_inetaddr_notifier(struct notifier_block *nb)
1244 {
1245         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1246 }
1247 EXPORT_SYMBOL(register_inetaddr_notifier);
1248
1249 int unregister_inetaddr_notifier(struct notifier_block *nb)
1250 {
1251         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1252 }
1253 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1254
1255 /* Rename ifa_labels for a device name change. Make some effort to preserve
1256  * existing alias numbering and to create unique labels if possible.
1257 */
1258 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1259 {
1260         struct in_ifaddr *ifa;
1261         int named = 0;
1262
1263         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1264                 char old[IFNAMSIZ], *dot;
1265
1266                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1267                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1268                 if (named++ == 0)
1269                         goto skip;
1270                 dot = strchr(old, ':');
1271                 if (dot == NULL) {
1272                         sprintf(old, ":%d", named);
1273                         dot = old;
1274                 }
1275                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1276                         strcat(ifa->ifa_label, dot);
1277                 else
1278                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1279 skip:
1280                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1281         }
1282 }
1283
1284 static bool inetdev_valid_mtu(unsigned int mtu)
1285 {
1286         return mtu >= 68;
1287 }
1288
1289 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1290                                         struct in_device *in_dev)
1291
1292 {
1293         struct in_ifaddr *ifa;
1294
1295         for (ifa = in_dev->ifa_list; ifa;
1296              ifa = ifa->ifa_next) {
1297                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1298                          ifa->ifa_local, dev,
1299                          ifa->ifa_local, NULL,
1300                          dev->dev_addr, NULL);
1301         }
1302 }
1303
1304 /* Called only under RTNL semaphore */
1305
1306 static int inetdev_event(struct notifier_block *this, unsigned long event,
1307                          void *ptr)
1308 {
1309         struct net_device *dev = ptr;
1310         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1311
1312         ASSERT_RTNL();
1313
1314         if (!in_dev) {
1315                 if (event == NETDEV_REGISTER) {
1316                         in_dev = inetdev_init(dev);
1317                         if (!in_dev)
1318                                 return notifier_from_errno(-ENOMEM);
1319                         if (dev->flags & IFF_LOOPBACK) {
1320                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1321                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1322                         }
1323                 } else if (event == NETDEV_CHANGEMTU) {
1324                         /* Re-enabling IP */
1325                         if (inetdev_valid_mtu(dev->mtu))
1326                                 in_dev = inetdev_init(dev);
1327                 }
1328                 goto out;
1329         }
1330
1331         switch (event) {
1332         case NETDEV_REGISTER:
1333                 pr_debug("%s: bug\n", __func__);
1334                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1335                 break;
1336         case NETDEV_UP:
1337                 if (!inetdev_valid_mtu(dev->mtu))
1338                         break;
1339                 if (dev->flags & IFF_LOOPBACK) {
1340                         struct in_ifaddr *ifa = inet_alloc_ifa();
1341
1342                         if (ifa) {
1343                                 INIT_HLIST_NODE(&ifa->hash);
1344                                 ifa->ifa_local =
1345                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1346                                 ifa->ifa_prefixlen = 8;
1347                                 ifa->ifa_mask = inet_make_mask(8);
1348                                 in_dev_hold(in_dev);
1349                                 ifa->ifa_dev = in_dev;
1350                                 ifa->ifa_scope = RT_SCOPE_HOST;
1351                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1352                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1353                                                  INFINITY_LIFE_TIME);
1354                                 inet_insert_ifa(ifa);
1355                         }
1356                 }
1357                 ip_mc_up(in_dev);
1358                 /* fall through */
1359         case NETDEV_CHANGEADDR:
1360                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1361                         break;
1362                 /* fall through */
1363         case NETDEV_NOTIFY_PEERS:
1364                 /* Send gratuitous ARP to notify of link change */
1365                 inetdev_send_gratuitous_arp(dev, in_dev);
1366                 break;
1367         case NETDEV_DOWN:
1368                 ip_mc_down(in_dev);
1369                 break;
1370         case NETDEV_PRE_TYPE_CHANGE:
1371                 ip_mc_unmap(in_dev);
1372                 break;
1373         case NETDEV_POST_TYPE_CHANGE:
1374                 ip_mc_remap(in_dev);
1375                 break;
1376         case NETDEV_CHANGEMTU:
1377                 if (inetdev_valid_mtu(dev->mtu))
1378                         break;
1379                 /* disable IP when MTU is not enough */
1380         case NETDEV_UNREGISTER:
1381                 inetdev_destroy(in_dev);
1382                 break;
1383         case NETDEV_CHANGENAME:
1384                 /* Do not notify about label change, this event is
1385                  * not interesting to applications using netlink.
1386                  */
1387                 inetdev_changename(dev, in_dev);
1388
1389                 devinet_sysctl_unregister(in_dev);
1390                 devinet_sysctl_register(in_dev);
1391                 break;
1392         }
1393 out:
1394         return NOTIFY_DONE;
1395 }
1396
1397 static struct notifier_block ip_netdev_notifier = {
1398         .notifier_call = inetdev_event,
1399 };
1400
1401 static size_t inet_nlmsg_size(void)
1402 {
1403         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1404                + nla_total_size(4) /* IFA_ADDRESS */
1405                + nla_total_size(4) /* IFA_LOCAL */
1406                + nla_total_size(4) /* IFA_BROADCAST */
1407                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1408 }
1409
1410 static inline u32 cstamp_delta(unsigned long cstamp)
1411 {
1412         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1413 }
1414
1415 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1416                          unsigned long tstamp, u32 preferred, u32 valid)
1417 {
1418         struct ifa_cacheinfo ci;
1419
1420         ci.cstamp = cstamp_delta(cstamp);
1421         ci.tstamp = cstamp_delta(tstamp);
1422         ci.ifa_prefered = preferred;
1423         ci.ifa_valid = valid;
1424
1425         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1426 }
1427
1428 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1429                             u32 portid, u32 seq, int event, unsigned int flags)
1430 {
1431         struct ifaddrmsg *ifm;
1432         struct nlmsghdr  *nlh;
1433         u32 preferred, valid;
1434
1435         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1436         if (nlh == NULL)
1437                 return -EMSGSIZE;
1438
1439         ifm = nlmsg_data(nlh);
1440         ifm->ifa_family = AF_INET;
1441         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1442         ifm->ifa_flags = ifa->ifa_flags;
1443         ifm->ifa_scope = ifa->ifa_scope;
1444         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1445
1446         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1447                 preferred = ifa->ifa_preferred_lft;
1448                 valid = ifa->ifa_valid_lft;
1449                 if (preferred != INFINITY_LIFE_TIME) {
1450                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1451
1452                         if (preferred > tval)
1453                                 preferred -= tval;
1454                         else
1455                                 preferred = 0;
1456                         if (valid != INFINITY_LIFE_TIME) {
1457                                 if (valid > tval)
1458                                         valid -= tval;
1459                                 else
1460                                         valid = 0;
1461                         }
1462                 }
1463         } else {
1464                 preferred = INFINITY_LIFE_TIME;
1465                 valid = INFINITY_LIFE_TIME;
1466         }
1467         if ((ifa->ifa_address &&
1468              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1469             (ifa->ifa_local &&
1470              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1471             (ifa->ifa_broadcast &&
1472              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1473             (ifa->ifa_label[0] &&
1474              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1475             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1476                           preferred, valid))
1477                 goto nla_put_failure;
1478
1479         return nlmsg_end(skb, nlh);
1480
1481 nla_put_failure:
1482         nlmsg_cancel(skb, nlh);
1483         return -EMSGSIZE;
1484 }
1485
1486 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1487 {
1488         struct net *net = sock_net(skb->sk);
1489         int h, s_h;
1490         int idx, s_idx;
1491         int ip_idx, s_ip_idx;
1492         struct net_device *dev;
1493         struct in_device *in_dev;
1494         struct in_ifaddr *ifa;
1495         struct hlist_head *head;
1496         struct hlist_node *node;
1497
1498         s_h = cb->args[0];
1499         s_idx = idx = cb->args[1];
1500         s_ip_idx = ip_idx = cb->args[2];
1501
1502         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1503                 idx = 0;
1504                 head = &net->dev_index_head[h];
1505                 rcu_read_lock();
1506                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1507                         if (idx < s_idx)
1508                                 goto cont;
1509                         if (h > s_h || idx > s_idx)
1510                                 s_ip_idx = 0;
1511                         in_dev = __in_dev_get_rcu(dev);
1512                         if (!in_dev)
1513                                 goto cont;
1514
1515                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1516                              ifa = ifa->ifa_next, ip_idx++) {
1517                                 if (ip_idx < s_ip_idx)
1518                                         continue;
1519                                 if (inet_fill_ifaddr(skb, ifa,
1520                                              NETLINK_CB(cb->skb).portid,
1521                                              cb->nlh->nlmsg_seq,
1522                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1523                                         rcu_read_unlock();
1524                                         goto done;
1525                                 }
1526                         }
1527 cont:
1528                         idx++;
1529                 }
1530                 rcu_read_unlock();
1531         }
1532
1533 done:
1534         cb->args[0] = h;
1535         cb->args[1] = idx;
1536         cb->args[2] = ip_idx;
1537
1538         return skb->len;
1539 }
1540
1541 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1542                       u32 portid)
1543 {
1544         struct sk_buff *skb;
1545         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1546         int err = -ENOBUFS;
1547         struct net *net;
1548
1549         net = dev_net(ifa->ifa_dev->dev);
1550         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1551         if (skb == NULL)
1552                 goto errout;
1553
1554         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1555         if (err < 0) {
1556                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1557                 WARN_ON(err == -EMSGSIZE);
1558                 kfree_skb(skb);
1559                 goto errout;
1560         }
1561         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1562         return;
1563 errout:
1564         if (err < 0)
1565                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1566 }
1567
1568 static size_t inet_get_link_af_size(const struct net_device *dev)
1569 {
1570         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1571
1572         if (!in_dev)
1573                 return 0;
1574
1575         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1576 }
1577
1578 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1579 {
1580         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1581         struct nlattr *nla;
1582         int i;
1583
1584         if (!in_dev)
1585                 return -ENODATA;
1586
1587         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1588         if (nla == NULL)
1589                 return -EMSGSIZE;
1590
1591         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1592                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1593
1594         return 0;
1595 }
1596
1597 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1598         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1599 };
1600
1601 static int inet_validate_link_af(const struct net_device *dev,
1602                                  const struct nlattr *nla)
1603 {
1604         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1605         int err, rem;
1606
1607         if (dev && !__in_dev_get_rtnl(dev))
1608                 return -EAFNOSUPPORT;
1609
1610         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1611         if (err < 0)
1612                 return err;
1613
1614         if (tb[IFLA_INET_CONF]) {
1615                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1616                         int cfgid = nla_type(a);
1617
1618                         if (nla_len(a) < 4)
1619                                 return -EINVAL;
1620
1621                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1622                                 return -EINVAL;
1623                 }
1624         }
1625
1626         return 0;
1627 }
1628
1629 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1630 {
1631         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1632         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1633         int rem;
1634
1635         if (!in_dev)
1636                 return -EAFNOSUPPORT;
1637
1638         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1639                 BUG();
1640
1641         if (tb[IFLA_INET_CONF]) {
1642                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1643                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1644         }
1645
1646         return 0;
1647 }
1648
1649 static int inet_netconf_msgsize_devconf(int type)
1650 {
1651         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1652                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1653
1654         /* type -1 is used for ALL */
1655         if (type == -1 || type == NETCONFA_FORWARDING)
1656                 size += nla_total_size(4);
1657         if (type == -1 || type == NETCONFA_RP_FILTER)
1658                 size += nla_total_size(4);
1659         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1660                 size += nla_total_size(4);
1661
1662         return size;
1663 }
1664
1665 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1666                                      struct ipv4_devconf *devconf, u32 portid,
1667                                      u32 seq, int event, unsigned int flags,
1668                                      int type)
1669 {
1670         struct nlmsghdr  *nlh;
1671         struct netconfmsg *ncm;
1672
1673         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1674                         flags);
1675         if (nlh == NULL)
1676                 return -EMSGSIZE;
1677
1678         ncm = nlmsg_data(nlh);
1679         ncm->ncm_family = AF_INET;
1680
1681         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1682                 goto nla_put_failure;
1683
1684         /* type -1 is used for ALL */
1685         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1686             nla_put_s32(skb, NETCONFA_FORWARDING,
1687                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1688                 goto nla_put_failure;
1689         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1690             nla_put_s32(skb, NETCONFA_RP_FILTER,
1691                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1692                 goto nla_put_failure;
1693         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1694             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1695                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1696                 goto nla_put_failure;
1697
1698         return nlmsg_end(skb, nlh);
1699
1700 nla_put_failure:
1701         nlmsg_cancel(skb, nlh);
1702         return -EMSGSIZE;
1703 }
1704
1705 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1706                                  struct ipv4_devconf *devconf)
1707 {
1708         struct sk_buff *skb;
1709         int err = -ENOBUFS;
1710
1711         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1712         if (skb == NULL)
1713                 goto errout;
1714
1715         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1716                                         RTM_NEWNETCONF, 0, type);
1717         if (err < 0) {
1718                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1719                 WARN_ON(err == -EMSGSIZE);
1720                 kfree_skb(skb);
1721                 goto errout;
1722         }
1723         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1724         return;
1725 errout:
1726         if (err < 0)
1727                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1728 }
1729
1730 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1731         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1732         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1733         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1734 };
1735
1736 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1737                                     struct nlmsghdr *nlh,
1738                                     void *arg)
1739 {
1740         struct net *net = sock_net(in_skb->sk);
1741         struct nlattr *tb[NETCONFA_MAX+1];
1742         struct netconfmsg *ncm;
1743         struct sk_buff *skb;
1744         struct ipv4_devconf *devconf;
1745         struct in_device *in_dev;
1746         struct net_device *dev;
1747         int ifindex;
1748         int err;
1749
1750         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1751                           devconf_ipv4_policy);
1752         if (err < 0)
1753                 goto errout;
1754
1755         err = EINVAL;
1756         if (!tb[NETCONFA_IFINDEX])
1757                 goto errout;
1758
1759         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1760         switch (ifindex) {
1761         case NETCONFA_IFINDEX_ALL:
1762                 devconf = net->ipv4.devconf_all;
1763                 break;
1764         case NETCONFA_IFINDEX_DEFAULT:
1765                 devconf = net->ipv4.devconf_dflt;
1766                 break;
1767         default:
1768                 dev = __dev_get_by_index(net, ifindex);
1769                 if (dev == NULL)
1770                         goto errout;
1771                 in_dev = __in_dev_get_rtnl(dev);
1772                 if (in_dev == NULL)
1773                         goto errout;
1774                 devconf = &in_dev->cnf;
1775                 break;
1776         }
1777
1778         err = -ENOBUFS;
1779         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1780         if (skb == NULL)
1781                 goto errout;
1782
1783         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1784                                         NETLINK_CB(in_skb).portid,
1785                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1786                                         -1);
1787         if (err < 0) {
1788                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1789                 WARN_ON(err == -EMSGSIZE);
1790                 kfree_skb(skb);
1791                 goto errout;
1792         }
1793         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1794 errout:
1795         return err;
1796 }
1797
1798 #ifdef CONFIG_SYSCTL
1799
1800 static void devinet_copy_dflt_conf(struct net *net, int i)
1801 {
1802         struct net_device *dev;
1803
1804         rcu_read_lock();
1805         for_each_netdev_rcu(net, dev) {
1806                 struct in_device *in_dev;
1807
1808                 in_dev = __in_dev_get_rcu(dev);
1809                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1810                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1811         }
1812         rcu_read_unlock();
1813 }
1814
1815 /* called with RTNL locked */
1816 static void inet_forward_change(struct net *net)
1817 {
1818         struct net_device *dev;
1819         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1820
1821         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1822         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1823         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1824                                     NETCONFA_IFINDEX_ALL,
1825                                     net->ipv4.devconf_all);
1826         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1827                                     NETCONFA_IFINDEX_DEFAULT,
1828                                     net->ipv4.devconf_dflt);
1829
1830         for_each_netdev(net, dev) {
1831                 struct in_device *in_dev;
1832                 if (on)
1833                         dev_disable_lro(dev);
1834                 rcu_read_lock();
1835                 in_dev = __in_dev_get_rcu(dev);
1836                 if (in_dev) {
1837                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1838                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1839                                                     dev->ifindex, &in_dev->cnf);
1840                 }
1841                 rcu_read_unlock();
1842         }
1843 }
1844
1845 static int devinet_conf_proc(ctl_table *ctl, int write,
1846                              void __user *buffer,
1847                              size_t *lenp, loff_t *ppos)
1848 {
1849         int old_value = *(int *)ctl->data;
1850         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1851         int new_value = *(int *)ctl->data;
1852
1853         if (write) {
1854                 struct ipv4_devconf *cnf = ctl->extra1;
1855                 struct net *net = ctl->extra2;
1856                 int i = (int *)ctl->data - cnf->data;
1857
1858                 set_bit(i, cnf->state);
1859
1860                 if (cnf == net->ipv4.devconf_dflt)
1861                         devinet_copy_dflt_conf(net, i);
1862                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1863                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1864                         if ((new_value == 0) && (old_value != 0))
1865                                 rt_cache_flush(net);
1866                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1867                     new_value != old_value) {
1868                         int ifindex;
1869
1870                         if (cnf == net->ipv4.devconf_dflt)
1871                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1872                         else if (cnf == net->ipv4.devconf_all)
1873                                 ifindex = NETCONFA_IFINDEX_ALL;
1874                         else {
1875                                 struct in_device *idev =
1876                                         container_of(cnf, struct in_device,
1877                                                      cnf);
1878                                 ifindex = idev->dev->ifindex;
1879                         }
1880                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1881                                                     ifindex, cnf);
1882                 }
1883         }
1884
1885         return ret;
1886 }
1887
1888 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1889                                   void __user *buffer,
1890                                   size_t *lenp, loff_t *ppos)
1891 {
1892         int *valp = ctl->data;
1893         int val = *valp;
1894         loff_t pos = *ppos;
1895         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1896
1897         if (write && *valp != val) {
1898                 struct net *net = ctl->extra2;
1899
1900                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1901                         if (!rtnl_trylock()) {
1902                                 /* Restore the original values before restarting */
1903                                 *valp = val;
1904                                 *ppos = pos;
1905                                 return restart_syscall();
1906                         }
1907                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1908                                 inet_forward_change(net);
1909                         } else {
1910                                 struct ipv4_devconf *cnf = ctl->extra1;
1911                                 struct in_device *idev =
1912                                         container_of(cnf, struct in_device, cnf);
1913                                 if (*valp)
1914                                         dev_disable_lro(idev->dev);
1915                                 inet_netconf_notify_devconf(net,
1916                                                             NETCONFA_FORWARDING,
1917                                                             idev->dev->ifindex,
1918                                                             cnf);
1919                         }
1920                         rtnl_unlock();
1921                         rt_cache_flush(net);
1922                 } else
1923                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1924                                                     NETCONFA_IFINDEX_DEFAULT,
1925                                                     net->ipv4.devconf_dflt);
1926         }
1927
1928         return ret;
1929 }
1930
1931 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1932                                 void __user *buffer,
1933                                 size_t *lenp, loff_t *ppos)
1934 {
1935         int *valp = ctl->data;
1936         int val = *valp;
1937         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1938         struct net *net = ctl->extra2;
1939
1940         if (write && *valp != val)
1941                 rt_cache_flush(net);
1942
1943         return ret;
1944 }
1945
1946 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1947         { \
1948                 .procname       = name, \
1949                 .data           = ipv4_devconf.data + \
1950                                   IPV4_DEVCONF_ ## attr - 1, \
1951                 .maxlen         = sizeof(int), \
1952                 .mode           = mval, \
1953                 .proc_handler   = proc, \
1954                 .extra1         = &ipv4_devconf, \
1955         }
1956
1957 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1958         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1959
1960 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1961         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1962
1963 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1964         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1965
1966 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1967         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1968
1969 static struct devinet_sysctl_table {
1970         struct ctl_table_header *sysctl_header;
1971         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1972 } devinet_sysctl = {
1973         .devinet_vars = {
1974                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1975                                              devinet_sysctl_forward),
1976                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1977
1978                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1979                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1980                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1981                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1982                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1983                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1984                                         "accept_source_route"),
1985                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1986                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1987                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1988                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1989                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1990                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1991                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1992                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1993                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1994                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1995                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1996                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1997                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1998
1999                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2000                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2001                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2002                                               "force_igmp_version"),
2003                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2004                                               "promote_secondaries"),
2005                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2006                                               "route_localnet"),
2007         },
2008 };
2009
2010 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2011                                         struct ipv4_devconf *p)
2012 {
2013         int i;
2014         struct devinet_sysctl_table *t;
2015         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2016
2017         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2018         if (!t)
2019                 goto out;
2020
2021         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2022                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2023                 t->devinet_vars[i].extra1 = p;
2024                 t->devinet_vars[i].extra2 = net;
2025         }
2026
2027         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2028
2029         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2030         if (!t->sysctl_header)
2031                 goto free;
2032
2033         p->sysctl = t;
2034         return 0;
2035
2036 free:
2037         kfree(t);
2038 out:
2039         return -ENOBUFS;
2040 }
2041
2042 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2043 {
2044         struct devinet_sysctl_table *t = cnf->sysctl;
2045
2046         if (t == NULL)
2047                 return;
2048
2049         cnf->sysctl = NULL;
2050         unregister_net_sysctl_table(t->sysctl_header);
2051         kfree(t);
2052 }
2053
2054 static void devinet_sysctl_register(struct in_device *idev)
2055 {
2056         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2057         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2058                                         &idev->cnf);
2059 }
2060
2061 static void devinet_sysctl_unregister(struct in_device *idev)
2062 {
2063         __devinet_sysctl_unregister(&idev->cnf);
2064         neigh_sysctl_unregister(idev->arp_parms);
2065 }
2066
2067 static struct ctl_table ctl_forward_entry[] = {
2068         {
2069                 .procname       = "ip_forward",
2070                 .data           = &ipv4_devconf.data[
2071                                         IPV4_DEVCONF_FORWARDING - 1],
2072                 .maxlen         = sizeof(int),
2073                 .mode           = 0644,
2074                 .proc_handler   = devinet_sysctl_forward,
2075                 .extra1         = &ipv4_devconf,
2076                 .extra2         = &init_net,
2077         },
2078         { },
2079 };
2080 #endif
2081
2082 static __net_init int devinet_init_net(struct net *net)
2083 {
2084         int err;
2085         struct ipv4_devconf *all, *dflt;
2086 #ifdef CONFIG_SYSCTL
2087         struct ctl_table *tbl = ctl_forward_entry;
2088         struct ctl_table_header *forw_hdr;
2089 #endif
2090
2091         err = -ENOMEM;
2092         all = &ipv4_devconf;
2093         dflt = &ipv4_devconf_dflt;
2094
2095         if (!net_eq(net, &init_net)) {
2096                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2097                 if (all == NULL)
2098                         goto err_alloc_all;
2099
2100                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2101                 if (dflt == NULL)
2102                         goto err_alloc_dflt;
2103
2104 #ifdef CONFIG_SYSCTL
2105                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2106                 if (tbl == NULL)
2107                         goto err_alloc_ctl;
2108
2109                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2110                 tbl[0].extra1 = all;
2111                 tbl[0].extra2 = net;
2112 #endif
2113         }
2114
2115 #ifdef CONFIG_SYSCTL
2116         err = __devinet_sysctl_register(net, "all", all);
2117         if (err < 0)
2118                 goto err_reg_all;
2119
2120         err = __devinet_sysctl_register(net, "default", dflt);
2121         if (err < 0)
2122                 goto err_reg_dflt;
2123
2124         err = -ENOMEM;
2125         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2126         if (forw_hdr == NULL)
2127                 goto err_reg_ctl;
2128         net->ipv4.forw_hdr = forw_hdr;
2129 #endif
2130
2131         net->ipv4.devconf_all = all;
2132         net->ipv4.devconf_dflt = dflt;
2133         return 0;
2134
2135 #ifdef CONFIG_SYSCTL
2136 err_reg_ctl:
2137         __devinet_sysctl_unregister(dflt);
2138 err_reg_dflt:
2139         __devinet_sysctl_unregister(all);
2140 err_reg_all:
2141         if (tbl != ctl_forward_entry)
2142                 kfree(tbl);
2143 err_alloc_ctl:
2144 #endif
2145         if (dflt != &ipv4_devconf_dflt)
2146                 kfree(dflt);
2147 err_alloc_dflt:
2148         if (all != &ipv4_devconf)
2149                 kfree(all);
2150 err_alloc_all:
2151         return err;
2152 }
2153
2154 static __net_exit void devinet_exit_net(struct net *net)
2155 {
2156 #ifdef CONFIG_SYSCTL
2157         struct ctl_table *tbl;
2158
2159         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2160         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2161         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2162         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2163         kfree(tbl);
2164 #endif
2165         kfree(net->ipv4.devconf_dflt);
2166         kfree(net->ipv4.devconf_all);
2167 }
2168
2169 static __net_initdata struct pernet_operations devinet_ops = {
2170         .init = devinet_init_net,
2171         .exit = devinet_exit_net,
2172 };
2173
2174 static struct rtnl_af_ops inet_af_ops = {
2175         .family           = AF_INET,
2176         .fill_link_af     = inet_fill_link_af,
2177         .get_link_af_size = inet_get_link_af_size,
2178         .validate_link_af = inet_validate_link_af,
2179         .set_link_af      = inet_set_link_af,
2180 };
2181
2182 void __init devinet_init(void)
2183 {
2184         int i;
2185
2186         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2187                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2188
2189         register_pernet_subsys(&devinet_ops);
2190
2191         register_gifconf(PF_INET, inet_gifconf);
2192         register_netdevice_notifier(&ip_netdev_notifier);
2193
2194         schedule_delayed_work(&check_lifetime_work, 0);
2195
2196         rtnl_af_register(&inet_af_ops);
2197
2198         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2199         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2200         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2201         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2202                       NULL, NULL);
2203 }
2204