neigh: restore old behaviour of default parms values
[cascardo/linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78         },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82         .data = {
83                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90         },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97         [IFA_LOCAL]             = { .type = NLA_U32 },
98         [IFA_ADDRESS]           = { .type = NLA_U32 },
99         [IFA_BROADCAST]         = { .type = NLA_U32 },
100         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
102 };
103
104 #define IN4_ADDR_HSIZE_SHIFT    8
105 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
106
107 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
108 static DEFINE_SPINLOCK(inet_addr_hash_lock);
109
110 static u32 inet_addr_hash(struct net *net, __be32 addr)
111 {
112         u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119         u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121         spin_lock(&inet_addr_hash_lock);
122         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123         spin_unlock(&inet_addr_hash_lock);
124 }
125
126 static void inet_hash_remove(struct in_ifaddr *ifa)
127 {
128         spin_lock(&inet_addr_hash_lock);
129         hlist_del_init_rcu(&ifa->hash);
130         spin_unlock(&inet_addr_hash_lock);
131 }
132
133 /**
134  * __ip_dev_find - find the first device with a given source address.
135  * @net: the net namespace
136  * @addr: the source address
137  * @devref: if true, take a reference on the found device
138  *
139  * If a caller uses devref=false, it should be protected by RCU, or RTNL
140  */
141 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
142 {
143         u32 hash = inet_addr_hash(net, addr);
144         struct net_device *result = NULL;
145         struct in_ifaddr *ifa;
146
147         rcu_read_lock();
148         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
149                 if (ifa->ifa_local == addr) {
150                         struct net_device *dev = ifa->ifa_dev->dev;
151
152                         if (!net_eq(dev_net(dev), net))
153                                 continue;
154                         result = dev;
155                         break;
156                 }
157         }
158         if (!result) {
159                 struct flowi4 fl4 = { .daddr = addr };
160                 struct fib_result res = { 0 };
161                 struct fib_table *local;
162
163                 /* Fallback to FIB local table so that communication
164                  * over loopback subnets work.
165                  */
166                 local = fib_get_table(net, RT_TABLE_LOCAL);
167                 if (local &&
168                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
169                     res.type == RTN_LOCAL)
170                         result = FIB_RES_DEV(res);
171         }
172         if (result && devref)
173                 dev_hold(result);
174         rcu_read_unlock();
175         return result;
176 }
177 EXPORT_SYMBOL(__ip_dev_find);
178
179 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
180
181 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
182 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
183                          int destroy);
184 #ifdef CONFIG_SYSCTL
185 static void devinet_sysctl_register(struct in_device *idev);
186 static void devinet_sysctl_unregister(struct in_device *idev);
187 #else
188 static void devinet_sysctl_register(struct in_device *idev)
189 {
190 }
191 static void devinet_sysctl_unregister(struct in_device *idev)
192 {
193 }
194 #endif
195
196 /* Locks all the inet devices. */
197
198 static struct in_ifaddr *inet_alloc_ifa(void)
199 {
200         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
201 }
202
203 static void inet_rcu_free_ifa(struct rcu_head *head)
204 {
205         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
206         if (ifa->ifa_dev)
207                 in_dev_put(ifa->ifa_dev);
208         kfree(ifa);
209 }
210
211 static void inet_free_ifa(struct in_ifaddr *ifa)
212 {
213         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
214 }
215
216 void in_dev_finish_destroy(struct in_device *idev)
217 {
218         struct net_device *dev = idev->dev;
219
220         WARN_ON(idev->ifa_list);
221         WARN_ON(idev->mc_list);
222         kfree(rcu_dereference_protected(idev->mc_hash, 1));
223 #ifdef NET_REFCNT_DEBUG
224         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
225 #endif
226         dev_put(dev);
227         if (!idev->dead)
228                 pr_err("Freeing alive in_device %p\n", idev);
229         else
230                 kfree(idev);
231 }
232 EXPORT_SYMBOL(in_dev_finish_destroy);
233
234 static struct in_device *inetdev_init(struct net_device *dev)
235 {
236         struct in_device *in_dev;
237
238         ASSERT_RTNL();
239
240         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241         if (!in_dev)
242                 goto out;
243         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244                         sizeof(in_dev->cnf));
245         in_dev->cnf.sysctl = NULL;
246         in_dev->dev = dev;
247         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248         if (!in_dev->arp_parms)
249                 goto out_kfree;
250         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251                 dev_disable_lro(dev);
252         /* Reference in_dev->dev */
253         dev_hold(dev);
254         /* Account for reference dev->ip_ptr (below) */
255         in_dev_hold(in_dev);
256
257         devinet_sysctl_register(in_dev);
258         ip_mc_init_dev(in_dev);
259         if (dev->flags & IFF_UP)
260                 ip_mc_up(in_dev);
261
262         /* we can receive as soon as ip_ptr is set -- do this last */
263         rcu_assign_pointer(dev->ip_ptr, in_dev);
264 out:
265         return in_dev;
266 out_kfree:
267         kfree(in_dev);
268         in_dev = NULL;
269         goto out;
270 }
271
272 static void in_dev_rcu_put(struct rcu_head *head)
273 {
274         struct in_device *idev = container_of(head, struct in_device, rcu_head);
275         in_dev_put(idev);
276 }
277
278 static void inetdev_destroy(struct in_device *in_dev)
279 {
280         struct in_ifaddr *ifa;
281         struct net_device *dev;
282
283         ASSERT_RTNL();
284
285         dev = in_dev->dev;
286
287         in_dev->dead = 1;
288
289         ip_mc_destroy_dev(in_dev);
290
291         while ((ifa = in_dev->ifa_list) != NULL) {
292                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
293                 inet_free_ifa(ifa);
294         }
295
296         RCU_INIT_POINTER(dev->ip_ptr, NULL);
297
298         devinet_sysctl_unregister(in_dev);
299         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
300         arp_ifdown(dev);
301
302         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
303 }
304
305 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
306 {
307         rcu_read_lock();
308         for_primary_ifa(in_dev) {
309                 if (inet_ifa_match(a, ifa)) {
310                         if (!b || inet_ifa_match(b, ifa)) {
311                                 rcu_read_unlock();
312                                 return 1;
313                         }
314                 }
315         } endfor_ifa(in_dev);
316         rcu_read_unlock();
317         return 0;
318 }
319
320 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
321                          int destroy, struct nlmsghdr *nlh, u32 portid)
322 {
323         struct in_ifaddr *promote = NULL;
324         struct in_ifaddr *ifa, *ifa1 = *ifap;
325         struct in_ifaddr *last_prim = in_dev->ifa_list;
326         struct in_ifaddr *prev_prom = NULL;
327         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
328
329         ASSERT_RTNL();
330
331         /* 1. Deleting primary ifaddr forces deletion all secondaries
332          * unless alias promotion is set
333          **/
334
335         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
336                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
337
338                 while ((ifa = *ifap1) != NULL) {
339                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
340                             ifa1->ifa_scope <= ifa->ifa_scope)
341                                 last_prim = ifa;
342
343                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
344                             ifa1->ifa_mask != ifa->ifa_mask ||
345                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
346                                 ifap1 = &ifa->ifa_next;
347                                 prev_prom = ifa;
348                                 continue;
349                         }
350
351                         if (!do_promote) {
352                                 inet_hash_remove(ifa);
353                                 *ifap1 = ifa->ifa_next;
354
355                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
356                                 blocking_notifier_call_chain(&inetaddr_chain,
357                                                 NETDEV_DOWN, ifa);
358                                 inet_free_ifa(ifa);
359                         } else {
360                                 promote = ifa;
361                                 break;
362                         }
363                 }
364         }
365
366         /* On promotion all secondaries from subnet are changing
367          * the primary IP, we must remove all their routes silently
368          * and later to add them back with new prefsrc. Do this
369          * while all addresses are on the device list.
370          */
371         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
372                 if (ifa1->ifa_mask == ifa->ifa_mask &&
373                     inet_ifa_match(ifa1->ifa_address, ifa))
374                         fib_del_ifaddr(ifa, ifa1);
375         }
376
377         /* 2. Unlink it */
378
379         *ifap = ifa1->ifa_next;
380         inet_hash_remove(ifa1);
381
382         /* 3. Announce address deletion */
383
384         /* Send message first, then call notifier.
385            At first sight, FIB update triggered by notifier
386            will refer to already deleted ifaddr, that could confuse
387            netlink listeners. It is not true: look, gated sees
388            that route deleted and if it still thinks that ifaddr
389            is valid, it will try to restore deleted routes... Grr.
390            So that, this order is correct.
391          */
392         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
393         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
394
395         if (promote) {
396                 struct in_ifaddr *next_sec = promote->ifa_next;
397
398                 if (prev_prom) {
399                         prev_prom->ifa_next = promote->ifa_next;
400                         promote->ifa_next = last_prim->ifa_next;
401                         last_prim->ifa_next = promote;
402                 }
403
404                 promote->ifa_flags &= ~IFA_F_SECONDARY;
405                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
406                 blocking_notifier_call_chain(&inetaddr_chain,
407                                 NETDEV_UP, promote);
408                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
409                         if (ifa1->ifa_mask != ifa->ifa_mask ||
410                             !inet_ifa_match(ifa1->ifa_address, ifa))
411                                         continue;
412                         fib_add_ifaddr(ifa);
413                 }
414
415         }
416         if (destroy)
417                 inet_free_ifa(ifa1);
418 }
419
420 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
421                          int destroy)
422 {
423         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
424 }
425
426 static void check_lifetime(struct work_struct *work);
427
428 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
429
430 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
431                              u32 portid)
432 {
433         struct in_device *in_dev = ifa->ifa_dev;
434         struct in_ifaddr *ifa1, **ifap, **last_primary;
435
436         ASSERT_RTNL();
437
438         if (!ifa->ifa_local) {
439                 inet_free_ifa(ifa);
440                 return 0;
441         }
442
443         ifa->ifa_flags &= ~IFA_F_SECONDARY;
444         last_primary = &in_dev->ifa_list;
445
446         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
447              ifap = &ifa1->ifa_next) {
448                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
449                     ifa->ifa_scope <= ifa1->ifa_scope)
450                         last_primary = &ifa1->ifa_next;
451                 if (ifa1->ifa_mask == ifa->ifa_mask &&
452                     inet_ifa_match(ifa1->ifa_address, ifa)) {
453                         if (ifa1->ifa_local == ifa->ifa_local) {
454                                 inet_free_ifa(ifa);
455                                 return -EEXIST;
456                         }
457                         if (ifa1->ifa_scope != ifa->ifa_scope) {
458                                 inet_free_ifa(ifa);
459                                 return -EINVAL;
460                         }
461                         ifa->ifa_flags |= IFA_F_SECONDARY;
462                 }
463         }
464
465         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
466                 net_srandom(ifa->ifa_local);
467                 ifap = last_primary;
468         }
469
470         ifa->ifa_next = *ifap;
471         *ifap = ifa;
472
473         inet_hash_insert(dev_net(in_dev->dev), ifa);
474
475         cancel_delayed_work(&check_lifetime_work);
476         schedule_delayed_work(&check_lifetime_work, 0);
477
478         /* Send message first, then call notifier.
479            Notifier will trigger FIB update, so that
480            listeners of netlink will know about new ifaddr */
481         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
482         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
483
484         return 0;
485 }
486
487 static int inet_insert_ifa(struct in_ifaddr *ifa)
488 {
489         return __inet_insert_ifa(ifa, NULL, 0);
490 }
491
492 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
493 {
494         struct in_device *in_dev = __in_dev_get_rtnl(dev);
495
496         ASSERT_RTNL();
497
498         if (!in_dev) {
499                 inet_free_ifa(ifa);
500                 return -ENOBUFS;
501         }
502         ipv4_devconf_setall(in_dev);
503         neigh_parms_data_state_setall(in_dev->arp_parms);
504         if (ifa->ifa_dev != in_dev) {
505                 WARN_ON(ifa->ifa_dev);
506                 in_dev_hold(in_dev);
507                 ifa->ifa_dev = in_dev;
508         }
509         if (ipv4_is_loopback(ifa->ifa_local))
510                 ifa->ifa_scope = RT_SCOPE_HOST;
511         return inet_insert_ifa(ifa);
512 }
513
514 /* Caller must hold RCU or RTNL :
515  * We dont take a reference on found in_device
516  */
517 struct in_device *inetdev_by_index(struct net *net, int ifindex)
518 {
519         struct net_device *dev;
520         struct in_device *in_dev = NULL;
521
522         rcu_read_lock();
523         dev = dev_get_by_index_rcu(net, ifindex);
524         if (dev)
525                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
526         rcu_read_unlock();
527         return in_dev;
528 }
529 EXPORT_SYMBOL(inetdev_by_index);
530
531 /* Called only from RTNL semaphored context. No locks. */
532
533 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
534                                     __be32 mask)
535 {
536         ASSERT_RTNL();
537
538         for_primary_ifa(in_dev) {
539                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
540                         return ifa;
541         } endfor_ifa(in_dev);
542         return NULL;
543 }
544
545 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
546 {
547         struct net *net = sock_net(skb->sk);
548         struct nlattr *tb[IFA_MAX+1];
549         struct in_device *in_dev;
550         struct ifaddrmsg *ifm;
551         struct in_ifaddr *ifa, **ifap;
552         int err = -EINVAL;
553
554         ASSERT_RTNL();
555
556         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
557         if (err < 0)
558                 goto errout;
559
560         ifm = nlmsg_data(nlh);
561         in_dev = inetdev_by_index(net, ifm->ifa_index);
562         if (in_dev == NULL) {
563                 err = -ENODEV;
564                 goto errout;
565         }
566
567         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
568              ifap = &ifa->ifa_next) {
569                 if (tb[IFA_LOCAL] &&
570                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
571                         continue;
572
573                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
574                         continue;
575
576                 if (tb[IFA_ADDRESS] &&
577                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
578                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
579                         continue;
580
581                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
582                 return 0;
583         }
584
585         err = -EADDRNOTAVAIL;
586 errout:
587         return err;
588 }
589
590 #define INFINITY_LIFE_TIME      0xFFFFFFFF
591
592 static void check_lifetime(struct work_struct *work)
593 {
594         unsigned long now, next, next_sec, next_sched;
595         struct in_ifaddr *ifa;
596         struct hlist_node *n;
597         int i;
598
599         now = jiffies;
600         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
601
602         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
603                 bool change_needed = false;
604
605                 rcu_read_lock();
606                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
607                         unsigned long age;
608
609                         if (ifa->ifa_flags & IFA_F_PERMANENT)
610                                 continue;
611
612                         /* We try to batch several events at once. */
613                         age = (now - ifa->ifa_tstamp +
614                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
615
616                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
617                             age >= ifa->ifa_valid_lft) {
618                                 change_needed = true;
619                         } else if (ifa->ifa_preferred_lft ==
620                                    INFINITY_LIFE_TIME) {
621                                 continue;
622                         } else if (age >= ifa->ifa_preferred_lft) {
623                                 if (time_before(ifa->ifa_tstamp +
624                                                 ifa->ifa_valid_lft * HZ, next))
625                                         next = ifa->ifa_tstamp +
626                                                ifa->ifa_valid_lft * HZ;
627
628                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
629                                         change_needed = true;
630                         } else if (time_before(ifa->ifa_tstamp +
631                                                ifa->ifa_preferred_lft * HZ,
632                                                next)) {
633                                 next = ifa->ifa_tstamp +
634                                        ifa->ifa_preferred_lft * HZ;
635                         }
636                 }
637                 rcu_read_unlock();
638                 if (!change_needed)
639                         continue;
640                 rtnl_lock();
641                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
642                         unsigned long age;
643
644                         if (ifa->ifa_flags & IFA_F_PERMANENT)
645                                 continue;
646
647                         /* We try to batch several events at once. */
648                         age = (now - ifa->ifa_tstamp +
649                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
650
651                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
652                             age >= ifa->ifa_valid_lft) {
653                                 struct in_ifaddr **ifap;
654
655                                 for (ifap = &ifa->ifa_dev->ifa_list;
656                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
657                                         if (*ifap == ifa) {
658                                                 inet_del_ifa(ifa->ifa_dev,
659                                                              ifap, 1);
660                                                 break;
661                                         }
662                                 }
663                         } else if (ifa->ifa_preferred_lft !=
664                                    INFINITY_LIFE_TIME &&
665                                    age >= ifa->ifa_preferred_lft &&
666                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
667                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
668                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
669                         }
670                 }
671                 rtnl_unlock();
672         }
673
674         next_sec = round_jiffies_up(next);
675         next_sched = next;
676
677         /* If rounded timeout is accurate enough, accept it. */
678         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
679                 next_sched = next_sec;
680
681         now = jiffies;
682         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
683         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
684                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
685
686         schedule_delayed_work(&check_lifetime_work, next_sched - now);
687 }
688
689 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
690                              __u32 prefered_lft)
691 {
692         unsigned long timeout;
693
694         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
695
696         timeout = addrconf_timeout_fixup(valid_lft, HZ);
697         if (addrconf_finite_timeout(timeout))
698                 ifa->ifa_valid_lft = timeout;
699         else
700                 ifa->ifa_flags |= IFA_F_PERMANENT;
701
702         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
703         if (addrconf_finite_timeout(timeout)) {
704                 if (timeout == 0)
705                         ifa->ifa_flags |= IFA_F_DEPRECATED;
706                 ifa->ifa_preferred_lft = timeout;
707         }
708         ifa->ifa_tstamp = jiffies;
709         if (!ifa->ifa_cstamp)
710                 ifa->ifa_cstamp = ifa->ifa_tstamp;
711 }
712
713 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
714                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
715 {
716         struct nlattr *tb[IFA_MAX+1];
717         struct in_ifaddr *ifa;
718         struct ifaddrmsg *ifm;
719         struct net_device *dev;
720         struct in_device *in_dev;
721         int err;
722
723         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
724         if (err < 0)
725                 goto errout;
726
727         ifm = nlmsg_data(nlh);
728         err = -EINVAL;
729         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
730                 goto errout;
731
732         dev = __dev_get_by_index(net, ifm->ifa_index);
733         err = -ENODEV;
734         if (dev == NULL)
735                 goto errout;
736
737         in_dev = __in_dev_get_rtnl(dev);
738         err = -ENOBUFS;
739         if (in_dev == NULL)
740                 goto errout;
741
742         ifa = inet_alloc_ifa();
743         if (ifa == NULL)
744                 /*
745                  * A potential indev allocation can be left alive, it stays
746                  * assigned to its device and is destroy with it.
747                  */
748                 goto errout;
749
750         ipv4_devconf_setall(in_dev);
751         neigh_parms_data_state_setall(in_dev->arp_parms);
752         in_dev_hold(in_dev);
753
754         if (tb[IFA_ADDRESS] == NULL)
755                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
756
757         INIT_HLIST_NODE(&ifa->hash);
758         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
759         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
760         ifa->ifa_flags = ifm->ifa_flags;
761         ifa->ifa_scope = ifm->ifa_scope;
762         ifa->ifa_dev = in_dev;
763
764         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
765         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
766
767         if (tb[IFA_BROADCAST])
768                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
769
770         if (tb[IFA_LABEL])
771                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
772         else
773                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
774
775         if (tb[IFA_CACHEINFO]) {
776                 struct ifa_cacheinfo *ci;
777
778                 ci = nla_data(tb[IFA_CACHEINFO]);
779                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
780                         err = -EINVAL;
781                         goto errout_free;
782                 }
783                 *pvalid_lft = ci->ifa_valid;
784                 *pprefered_lft = ci->ifa_prefered;
785         }
786
787         return ifa;
788
789 errout_free:
790         inet_free_ifa(ifa);
791 errout:
792         return ERR_PTR(err);
793 }
794
795 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
796 {
797         struct in_device *in_dev = ifa->ifa_dev;
798         struct in_ifaddr *ifa1, **ifap;
799
800         if (!ifa->ifa_local)
801                 return NULL;
802
803         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
804              ifap = &ifa1->ifa_next) {
805                 if (ifa1->ifa_mask == ifa->ifa_mask &&
806                     inet_ifa_match(ifa1->ifa_address, ifa) &&
807                     ifa1->ifa_local == ifa->ifa_local)
808                         return ifa1;
809         }
810         return NULL;
811 }
812
813 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
814 {
815         struct net *net = sock_net(skb->sk);
816         struct in_ifaddr *ifa;
817         struct in_ifaddr *ifa_existing;
818         __u32 valid_lft = INFINITY_LIFE_TIME;
819         __u32 prefered_lft = INFINITY_LIFE_TIME;
820
821         ASSERT_RTNL();
822
823         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
824         if (IS_ERR(ifa))
825                 return PTR_ERR(ifa);
826
827         ifa_existing = find_matching_ifa(ifa);
828         if (!ifa_existing) {
829                 /* It would be best to check for !NLM_F_CREATE here but
830                  * userspace alreay relies on not having to provide this.
831                  */
832                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
833                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
834         } else {
835                 inet_free_ifa(ifa);
836
837                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
838                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
839                         return -EEXIST;
840                 ifa = ifa_existing;
841                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
842                 cancel_delayed_work(&check_lifetime_work);
843                 schedule_delayed_work(&check_lifetime_work, 0);
844                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
845                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
846         }
847         return 0;
848 }
849
850 /*
851  *      Determine a default network mask, based on the IP address.
852  */
853
854 static int inet_abc_len(__be32 addr)
855 {
856         int rc = -1;    /* Something else, probably a multicast. */
857
858         if (ipv4_is_zeronet(addr))
859                 rc = 0;
860         else {
861                 __u32 haddr = ntohl(addr);
862
863                 if (IN_CLASSA(haddr))
864                         rc = 8;
865                 else if (IN_CLASSB(haddr))
866                         rc = 16;
867                 else if (IN_CLASSC(haddr))
868                         rc = 24;
869         }
870
871         return rc;
872 }
873
874
875 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
876 {
877         struct ifreq ifr;
878         struct sockaddr_in sin_orig;
879         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
880         struct in_device *in_dev;
881         struct in_ifaddr **ifap = NULL;
882         struct in_ifaddr *ifa = NULL;
883         struct net_device *dev;
884         char *colon;
885         int ret = -EFAULT;
886         int tryaddrmatch = 0;
887
888         /*
889          *      Fetch the caller's info block into kernel space
890          */
891
892         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
893                 goto out;
894         ifr.ifr_name[IFNAMSIZ - 1] = 0;
895
896         /* save original address for comparison */
897         memcpy(&sin_orig, sin, sizeof(*sin));
898
899         colon = strchr(ifr.ifr_name, ':');
900         if (colon)
901                 *colon = 0;
902
903         dev_load(net, ifr.ifr_name);
904
905         switch (cmd) {
906         case SIOCGIFADDR:       /* Get interface address */
907         case SIOCGIFBRDADDR:    /* Get the broadcast address */
908         case SIOCGIFDSTADDR:    /* Get the destination address */
909         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
910                 /* Note that these ioctls will not sleep,
911                    so that we do not impose a lock.
912                    One day we will be forced to put shlock here (I mean SMP)
913                  */
914                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
915                 memset(sin, 0, sizeof(*sin));
916                 sin->sin_family = AF_INET;
917                 break;
918
919         case SIOCSIFFLAGS:
920                 ret = -EPERM;
921                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
922                         goto out;
923                 break;
924         case SIOCSIFADDR:       /* Set interface address (and family) */
925         case SIOCSIFBRDADDR:    /* Set the broadcast address */
926         case SIOCSIFDSTADDR:    /* Set the destination address */
927         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
928                 ret = -EPERM;
929                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
930                         goto out;
931                 ret = -EINVAL;
932                 if (sin->sin_family != AF_INET)
933                         goto out;
934                 break;
935         default:
936                 ret = -EINVAL;
937                 goto out;
938         }
939
940         rtnl_lock();
941
942         ret = -ENODEV;
943         dev = __dev_get_by_name(net, ifr.ifr_name);
944         if (!dev)
945                 goto done;
946
947         if (colon)
948                 *colon = ':';
949
950         in_dev = __in_dev_get_rtnl(dev);
951         if (in_dev) {
952                 if (tryaddrmatch) {
953                         /* Matthias Andree */
954                         /* compare label and address (4.4BSD style) */
955                         /* note: we only do this for a limited set of ioctls
956                            and only if the original address family was AF_INET.
957                            This is checked above. */
958                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
959                              ifap = &ifa->ifa_next) {
960                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
961                                     sin_orig.sin_addr.s_addr ==
962                                                         ifa->ifa_local) {
963                                         break; /* found */
964                                 }
965                         }
966                 }
967                 /* we didn't get a match, maybe the application is
968                    4.3BSD-style and passed in junk so we fall back to
969                    comparing just the label */
970                 if (!ifa) {
971                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
972                              ifap = &ifa->ifa_next)
973                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
974                                         break;
975                 }
976         }
977
978         ret = -EADDRNOTAVAIL;
979         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
980                 goto done;
981
982         switch (cmd) {
983         case SIOCGIFADDR:       /* Get interface address */
984                 sin->sin_addr.s_addr = ifa->ifa_local;
985                 goto rarok;
986
987         case SIOCGIFBRDADDR:    /* Get the broadcast address */
988                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
989                 goto rarok;
990
991         case SIOCGIFDSTADDR:    /* Get the destination address */
992                 sin->sin_addr.s_addr = ifa->ifa_address;
993                 goto rarok;
994
995         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
996                 sin->sin_addr.s_addr = ifa->ifa_mask;
997                 goto rarok;
998
999         case SIOCSIFFLAGS:
1000                 if (colon) {
1001                         ret = -EADDRNOTAVAIL;
1002                         if (!ifa)
1003                                 break;
1004                         ret = 0;
1005                         if (!(ifr.ifr_flags & IFF_UP))
1006                                 inet_del_ifa(in_dev, ifap, 1);
1007                         break;
1008                 }
1009                 ret = dev_change_flags(dev, ifr.ifr_flags);
1010                 break;
1011
1012         case SIOCSIFADDR:       /* Set interface address (and family) */
1013                 ret = -EINVAL;
1014                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1015                         break;
1016
1017                 if (!ifa) {
1018                         ret = -ENOBUFS;
1019                         ifa = inet_alloc_ifa();
1020                         if (!ifa)
1021                                 break;
1022                         INIT_HLIST_NODE(&ifa->hash);
1023                         if (colon)
1024                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1025                         else
1026                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1027                 } else {
1028                         ret = 0;
1029                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1030                                 break;
1031                         inet_del_ifa(in_dev, ifap, 0);
1032                         ifa->ifa_broadcast = 0;
1033                         ifa->ifa_scope = 0;
1034                 }
1035
1036                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1037
1038                 if (!(dev->flags & IFF_POINTOPOINT)) {
1039                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1040                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1041                         if ((dev->flags & IFF_BROADCAST) &&
1042                             ifa->ifa_prefixlen < 31)
1043                                 ifa->ifa_broadcast = ifa->ifa_address |
1044                                                      ~ifa->ifa_mask;
1045                 } else {
1046                         ifa->ifa_prefixlen = 32;
1047                         ifa->ifa_mask = inet_make_mask(32);
1048                 }
1049                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1050                 ret = inet_set_ifa(dev, ifa);
1051                 break;
1052
1053         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1054                 ret = 0;
1055                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1056                         inet_del_ifa(in_dev, ifap, 0);
1057                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1058                         inet_insert_ifa(ifa);
1059                 }
1060                 break;
1061
1062         case SIOCSIFDSTADDR:    /* Set the destination address */
1063                 ret = 0;
1064                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1065                         break;
1066                 ret = -EINVAL;
1067                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1068                         break;
1069                 ret = 0;
1070                 inet_del_ifa(in_dev, ifap, 0);
1071                 ifa->ifa_address = sin->sin_addr.s_addr;
1072                 inet_insert_ifa(ifa);
1073                 break;
1074
1075         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1076
1077                 /*
1078                  *      The mask we set must be legal.
1079                  */
1080                 ret = -EINVAL;
1081                 if (bad_mask(sin->sin_addr.s_addr, 0))
1082                         break;
1083                 ret = 0;
1084                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1085                         __be32 old_mask = ifa->ifa_mask;
1086                         inet_del_ifa(in_dev, ifap, 0);
1087                         ifa->ifa_mask = sin->sin_addr.s_addr;
1088                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1089
1090                         /* See if current broadcast address matches
1091                          * with current netmask, then recalculate
1092                          * the broadcast address. Otherwise it's a
1093                          * funny address, so don't touch it since
1094                          * the user seems to know what (s)he's doing...
1095                          */
1096                         if ((dev->flags & IFF_BROADCAST) &&
1097                             (ifa->ifa_prefixlen < 31) &&
1098                             (ifa->ifa_broadcast ==
1099                              (ifa->ifa_local|~old_mask))) {
1100                                 ifa->ifa_broadcast = (ifa->ifa_local |
1101                                                       ~sin->sin_addr.s_addr);
1102                         }
1103                         inet_insert_ifa(ifa);
1104                 }
1105                 break;
1106         }
1107 done:
1108         rtnl_unlock();
1109 out:
1110         return ret;
1111 rarok:
1112         rtnl_unlock();
1113         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1114         goto out;
1115 }
1116
1117 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1118 {
1119         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1120         struct in_ifaddr *ifa;
1121         struct ifreq ifr;
1122         int done = 0;
1123
1124         if (!in_dev)
1125                 goto out;
1126
1127         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1128                 if (!buf) {
1129                         done += sizeof(ifr);
1130                         continue;
1131                 }
1132                 if (len < (int) sizeof(ifr))
1133                         break;
1134                 memset(&ifr, 0, sizeof(struct ifreq));
1135                 strcpy(ifr.ifr_name, ifa->ifa_label);
1136
1137                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1138                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1139                                                                 ifa->ifa_local;
1140
1141                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1142                         done = -EFAULT;
1143                         break;
1144                 }
1145                 buf  += sizeof(struct ifreq);
1146                 len  -= sizeof(struct ifreq);
1147                 done += sizeof(struct ifreq);
1148         }
1149 out:
1150         return done;
1151 }
1152
1153 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1154 {
1155         __be32 addr = 0;
1156         struct in_device *in_dev;
1157         struct net *net = dev_net(dev);
1158
1159         rcu_read_lock();
1160         in_dev = __in_dev_get_rcu(dev);
1161         if (!in_dev)
1162                 goto no_in_dev;
1163
1164         for_primary_ifa(in_dev) {
1165                 if (ifa->ifa_scope > scope)
1166                         continue;
1167                 if (!dst || inet_ifa_match(dst, ifa)) {
1168                         addr = ifa->ifa_local;
1169                         break;
1170                 }
1171                 if (!addr)
1172                         addr = ifa->ifa_local;
1173         } endfor_ifa(in_dev);
1174
1175         if (addr)
1176                 goto out_unlock;
1177 no_in_dev:
1178
1179         /* Not loopback addresses on loopback should be preferred
1180            in this case. It is importnat that lo is the first interface
1181            in dev_base list.
1182          */
1183         for_each_netdev_rcu(net, dev) {
1184                 in_dev = __in_dev_get_rcu(dev);
1185                 if (!in_dev)
1186                         continue;
1187
1188                 for_primary_ifa(in_dev) {
1189                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1190                             ifa->ifa_scope <= scope) {
1191                                 addr = ifa->ifa_local;
1192                                 goto out_unlock;
1193                         }
1194                 } endfor_ifa(in_dev);
1195         }
1196 out_unlock:
1197         rcu_read_unlock();
1198         return addr;
1199 }
1200 EXPORT_SYMBOL(inet_select_addr);
1201
1202 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1203                               __be32 local, int scope)
1204 {
1205         int same = 0;
1206         __be32 addr = 0;
1207
1208         for_ifa(in_dev) {
1209                 if (!addr &&
1210                     (local == ifa->ifa_local || !local) &&
1211                     ifa->ifa_scope <= scope) {
1212                         addr = ifa->ifa_local;
1213                         if (same)
1214                                 break;
1215                 }
1216                 if (!same) {
1217                         same = (!local || inet_ifa_match(local, ifa)) &&
1218                                 (!dst || inet_ifa_match(dst, ifa));
1219                         if (same && addr) {
1220                                 if (local || !dst)
1221                                         break;
1222                                 /* Is the selected addr into dst subnet? */
1223                                 if (inet_ifa_match(addr, ifa))
1224                                         break;
1225                                 /* No, then can we use new local src? */
1226                                 if (ifa->ifa_scope <= scope) {
1227                                         addr = ifa->ifa_local;
1228                                         break;
1229                                 }
1230                                 /* search for large dst subnet for addr */
1231                                 same = 0;
1232                         }
1233                 }
1234         } endfor_ifa(in_dev);
1235
1236         return same ? addr : 0;
1237 }
1238
1239 /*
1240  * Confirm that local IP address exists using wildcards:
1241  * - in_dev: only on this interface, 0=any interface
1242  * - dst: only in the same subnet as dst, 0=any dst
1243  * - local: address, 0=autoselect the local address
1244  * - scope: maximum allowed scope value for the local address
1245  */
1246 __be32 inet_confirm_addr(struct in_device *in_dev,
1247                          __be32 dst, __be32 local, int scope)
1248 {
1249         __be32 addr = 0;
1250         struct net_device *dev;
1251         struct net *net;
1252
1253         if (scope != RT_SCOPE_LINK)
1254                 return confirm_addr_indev(in_dev, dst, local, scope);
1255
1256         net = dev_net(in_dev->dev);
1257         rcu_read_lock();
1258         for_each_netdev_rcu(net, dev) {
1259                 in_dev = __in_dev_get_rcu(dev);
1260                 if (in_dev) {
1261                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1262                         if (addr)
1263                                 break;
1264                 }
1265         }
1266         rcu_read_unlock();
1267
1268         return addr;
1269 }
1270 EXPORT_SYMBOL(inet_confirm_addr);
1271
1272 /*
1273  *      Device notifier
1274  */
1275
1276 int register_inetaddr_notifier(struct notifier_block *nb)
1277 {
1278         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1279 }
1280 EXPORT_SYMBOL(register_inetaddr_notifier);
1281
1282 int unregister_inetaddr_notifier(struct notifier_block *nb)
1283 {
1284         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1285 }
1286 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1287
1288 /* Rename ifa_labels for a device name change. Make some effort to preserve
1289  * existing alias numbering and to create unique labels if possible.
1290 */
1291 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1292 {
1293         struct in_ifaddr *ifa;
1294         int named = 0;
1295
1296         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1297                 char old[IFNAMSIZ], *dot;
1298
1299                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1300                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1301                 if (named++ == 0)
1302                         goto skip;
1303                 dot = strchr(old, ':');
1304                 if (dot == NULL) {
1305                         sprintf(old, ":%d", named);
1306                         dot = old;
1307                 }
1308                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1309                         strcat(ifa->ifa_label, dot);
1310                 else
1311                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1312 skip:
1313                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1314         }
1315 }
1316
1317 static bool inetdev_valid_mtu(unsigned int mtu)
1318 {
1319         return mtu >= 68;
1320 }
1321
1322 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1323                                         struct in_device *in_dev)
1324
1325 {
1326         struct in_ifaddr *ifa;
1327
1328         for (ifa = in_dev->ifa_list; ifa;
1329              ifa = ifa->ifa_next) {
1330                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1331                          ifa->ifa_local, dev,
1332                          ifa->ifa_local, NULL,
1333                          dev->dev_addr, NULL);
1334         }
1335 }
1336
1337 /* Called only under RTNL semaphore */
1338
1339 static int inetdev_event(struct notifier_block *this, unsigned long event,
1340                          void *ptr)
1341 {
1342         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1343         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1344
1345         ASSERT_RTNL();
1346
1347         if (!in_dev) {
1348                 if (event == NETDEV_REGISTER) {
1349                         in_dev = inetdev_init(dev);
1350                         if (!in_dev)
1351                                 return notifier_from_errno(-ENOMEM);
1352                         if (dev->flags & IFF_LOOPBACK) {
1353                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1354                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1355                         }
1356                 } else if (event == NETDEV_CHANGEMTU) {
1357                         /* Re-enabling IP */
1358                         if (inetdev_valid_mtu(dev->mtu))
1359                                 in_dev = inetdev_init(dev);
1360                 }
1361                 goto out;
1362         }
1363
1364         switch (event) {
1365         case NETDEV_REGISTER:
1366                 pr_debug("%s: bug\n", __func__);
1367                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1368                 break;
1369         case NETDEV_UP:
1370                 if (!inetdev_valid_mtu(dev->mtu))
1371                         break;
1372                 if (dev->flags & IFF_LOOPBACK) {
1373                         struct in_ifaddr *ifa = inet_alloc_ifa();
1374
1375                         if (ifa) {
1376                                 INIT_HLIST_NODE(&ifa->hash);
1377                                 ifa->ifa_local =
1378                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1379                                 ifa->ifa_prefixlen = 8;
1380                                 ifa->ifa_mask = inet_make_mask(8);
1381                                 in_dev_hold(in_dev);
1382                                 ifa->ifa_dev = in_dev;
1383                                 ifa->ifa_scope = RT_SCOPE_HOST;
1384                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1385                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1386                                                  INFINITY_LIFE_TIME);
1387                                 inet_insert_ifa(ifa);
1388                         }
1389                 }
1390                 ip_mc_up(in_dev);
1391                 /* fall through */
1392         case NETDEV_CHANGEADDR:
1393                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1394                         break;
1395                 /* fall through */
1396         case NETDEV_NOTIFY_PEERS:
1397                 /* Send gratuitous ARP to notify of link change */
1398                 inetdev_send_gratuitous_arp(dev, in_dev);
1399                 break;
1400         case NETDEV_DOWN:
1401                 ip_mc_down(in_dev);
1402                 break;
1403         case NETDEV_PRE_TYPE_CHANGE:
1404                 ip_mc_unmap(in_dev);
1405                 break;
1406         case NETDEV_POST_TYPE_CHANGE:
1407                 ip_mc_remap(in_dev);
1408                 break;
1409         case NETDEV_CHANGEMTU:
1410                 if (inetdev_valid_mtu(dev->mtu))
1411                         break;
1412                 /* disable IP when MTU is not enough */
1413         case NETDEV_UNREGISTER:
1414                 inetdev_destroy(in_dev);
1415                 break;
1416         case NETDEV_CHANGENAME:
1417                 /* Do not notify about label change, this event is
1418                  * not interesting to applications using netlink.
1419                  */
1420                 inetdev_changename(dev, in_dev);
1421
1422                 devinet_sysctl_unregister(in_dev);
1423                 devinet_sysctl_register(in_dev);
1424                 break;
1425         }
1426 out:
1427         return NOTIFY_DONE;
1428 }
1429
1430 static struct notifier_block ip_netdev_notifier = {
1431         .notifier_call = inetdev_event,
1432 };
1433
1434 static size_t inet_nlmsg_size(void)
1435 {
1436         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1437                + nla_total_size(4) /* IFA_ADDRESS */
1438                + nla_total_size(4) /* IFA_LOCAL */
1439                + nla_total_size(4) /* IFA_BROADCAST */
1440                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1441 }
1442
1443 static inline u32 cstamp_delta(unsigned long cstamp)
1444 {
1445         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1446 }
1447
1448 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1449                          unsigned long tstamp, u32 preferred, u32 valid)
1450 {
1451         struct ifa_cacheinfo ci;
1452
1453         ci.cstamp = cstamp_delta(cstamp);
1454         ci.tstamp = cstamp_delta(tstamp);
1455         ci.ifa_prefered = preferred;
1456         ci.ifa_valid = valid;
1457
1458         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1459 }
1460
1461 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1462                             u32 portid, u32 seq, int event, unsigned int flags)
1463 {
1464         struct ifaddrmsg *ifm;
1465         struct nlmsghdr  *nlh;
1466         u32 preferred, valid;
1467
1468         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1469         if (nlh == NULL)
1470                 return -EMSGSIZE;
1471
1472         ifm = nlmsg_data(nlh);
1473         ifm->ifa_family = AF_INET;
1474         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1475         ifm->ifa_flags = ifa->ifa_flags;
1476         ifm->ifa_scope = ifa->ifa_scope;
1477         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1478
1479         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1480                 preferred = ifa->ifa_preferred_lft;
1481                 valid = ifa->ifa_valid_lft;
1482                 if (preferred != INFINITY_LIFE_TIME) {
1483                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1484
1485                         if (preferred > tval)
1486                                 preferred -= tval;
1487                         else
1488                                 preferred = 0;
1489                         if (valid != INFINITY_LIFE_TIME) {
1490                                 if (valid > tval)
1491                                         valid -= tval;
1492                                 else
1493                                         valid = 0;
1494                         }
1495                 }
1496         } else {
1497                 preferred = INFINITY_LIFE_TIME;
1498                 valid = INFINITY_LIFE_TIME;
1499         }
1500         if ((ifa->ifa_address &&
1501              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1502             (ifa->ifa_local &&
1503              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1504             (ifa->ifa_broadcast &&
1505              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1506             (ifa->ifa_label[0] &&
1507              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1508             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1509                           preferred, valid))
1510                 goto nla_put_failure;
1511
1512         return nlmsg_end(skb, nlh);
1513
1514 nla_put_failure:
1515         nlmsg_cancel(skb, nlh);
1516         return -EMSGSIZE;
1517 }
1518
1519 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1520 {
1521         struct net *net = sock_net(skb->sk);
1522         int h, s_h;
1523         int idx, s_idx;
1524         int ip_idx, s_ip_idx;
1525         struct net_device *dev;
1526         struct in_device *in_dev;
1527         struct in_ifaddr *ifa;
1528         struct hlist_head *head;
1529
1530         s_h = cb->args[0];
1531         s_idx = idx = cb->args[1];
1532         s_ip_idx = ip_idx = cb->args[2];
1533
1534         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1535                 idx = 0;
1536                 head = &net->dev_index_head[h];
1537                 rcu_read_lock();
1538                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1539                           net->dev_base_seq;
1540                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1541                         if (idx < s_idx)
1542                                 goto cont;
1543                         if (h > s_h || idx > s_idx)
1544                                 s_ip_idx = 0;
1545                         in_dev = __in_dev_get_rcu(dev);
1546                         if (!in_dev)
1547                                 goto cont;
1548
1549                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1550                              ifa = ifa->ifa_next, ip_idx++) {
1551                                 if (ip_idx < s_ip_idx)
1552                                         continue;
1553                                 if (inet_fill_ifaddr(skb, ifa,
1554                                              NETLINK_CB(cb->skb).portid,
1555                                              cb->nlh->nlmsg_seq,
1556                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1557                                         rcu_read_unlock();
1558                                         goto done;
1559                                 }
1560                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1561                         }
1562 cont:
1563                         idx++;
1564                 }
1565                 rcu_read_unlock();
1566         }
1567
1568 done:
1569         cb->args[0] = h;
1570         cb->args[1] = idx;
1571         cb->args[2] = ip_idx;
1572
1573         return skb->len;
1574 }
1575
1576 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1577                       u32 portid)
1578 {
1579         struct sk_buff *skb;
1580         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1581         int err = -ENOBUFS;
1582         struct net *net;
1583
1584         net = dev_net(ifa->ifa_dev->dev);
1585         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1586         if (skb == NULL)
1587                 goto errout;
1588
1589         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1590         if (err < 0) {
1591                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1592                 WARN_ON(err == -EMSGSIZE);
1593                 kfree_skb(skb);
1594                 goto errout;
1595         }
1596         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1597         return;
1598 errout:
1599         if (err < 0)
1600                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1601 }
1602
1603 static size_t inet_get_link_af_size(const struct net_device *dev)
1604 {
1605         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1606
1607         if (!in_dev)
1608                 return 0;
1609
1610         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1611 }
1612
1613 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1614 {
1615         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1616         struct nlattr *nla;
1617         int i;
1618
1619         if (!in_dev)
1620                 return -ENODATA;
1621
1622         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1623         if (nla == NULL)
1624                 return -EMSGSIZE;
1625
1626         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1627                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1628
1629         return 0;
1630 }
1631
1632 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1633         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1634 };
1635
1636 static int inet_validate_link_af(const struct net_device *dev,
1637                                  const struct nlattr *nla)
1638 {
1639         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1640         int err, rem;
1641
1642         if (dev && !__in_dev_get_rtnl(dev))
1643                 return -EAFNOSUPPORT;
1644
1645         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1646         if (err < 0)
1647                 return err;
1648
1649         if (tb[IFLA_INET_CONF]) {
1650                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1651                         int cfgid = nla_type(a);
1652
1653                         if (nla_len(a) < 4)
1654                                 return -EINVAL;
1655
1656                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1657                                 return -EINVAL;
1658                 }
1659         }
1660
1661         return 0;
1662 }
1663
1664 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1665 {
1666         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1667         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1668         int rem;
1669
1670         if (!in_dev)
1671                 return -EAFNOSUPPORT;
1672
1673         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1674                 BUG();
1675
1676         if (tb[IFLA_INET_CONF]) {
1677                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1678                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1679         }
1680
1681         return 0;
1682 }
1683
1684 static int inet_netconf_msgsize_devconf(int type)
1685 {
1686         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1687                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1688
1689         /* type -1 is used for ALL */
1690         if (type == -1 || type == NETCONFA_FORWARDING)
1691                 size += nla_total_size(4);
1692         if (type == -1 || type == NETCONFA_RP_FILTER)
1693                 size += nla_total_size(4);
1694         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1695                 size += nla_total_size(4);
1696
1697         return size;
1698 }
1699
1700 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1701                                      struct ipv4_devconf *devconf, u32 portid,
1702                                      u32 seq, int event, unsigned int flags,
1703                                      int type)
1704 {
1705         struct nlmsghdr  *nlh;
1706         struct netconfmsg *ncm;
1707
1708         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1709                         flags);
1710         if (nlh == NULL)
1711                 return -EMSGSIZE;
1712
1713         ncm = nlmsg_data(nlh);
1714         ncm->ncm_family = AF_INET;
1715
1716         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1717                 goto nla_put_failure;
1718
1719         /* type -1 is used for ALL */
1720         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1721             nla_put_s32(skb, NETCONFA_FORWARDING,
1722                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1723                 goto nla_put_failure;
1724         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1725             nla_put_s32(skb, NETCONFA_RP_FILTER,
1726                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1727                 goto nla_put_failure;
1728         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1729             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1730                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1731                 goto nla_put_failure;
1732
1733         return nlmsg_end(skb, nlh);
1734
1735 nla_put_failure:
1736         nlmsg_cancel(skb, nlh);
1737         return -EMSGSIZE;
1738 }
1739
1740 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1741                                  struct ipv4_devconf *devconf)
1742 {
1743         struct sk_buff *skb;
1744         int err = -ENOBUFS;
1745
1746         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1747         if (skb == NULL)
1748                 goto errout;
1749
1750         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1751                                         RTM_NEWNETCONF, 0, type);
1752         if (err < 0) {
1753                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1754                 WARN_ON(err == -EMSGSIZE);
1755                 kfree_skb(skb);
1756                 goto errout;
1757         }
1758         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1759         return;
1760 errout:
1761         if (err < 0)
1762                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1763 }
1764
1765 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1766         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1767         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1768         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1769 };
1770
1771 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1772                                     struct nlmsghdr *nlh)
1773 {
1774         struct net *net = sock_net(in_skb->sk);
1775         struct nlattr *tb[NETCONFA_MAX+1];
1776         struct netconfmsg *ncm;
1777         struct sk_buff *skb;
1778         struct ipv4_devconf *devconf;
1779         struct in_device *in_dev;
1780         struct net_device *dev;
1781         int ifindex;
1782         int err;
1783
1784         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1785                           devconf_ipv4_policy);
1786         if (err < 0)
1787                 goto errout;
1788
1789         err = EINVAL;
1790         if (!tb[NETCONFA_IFINDEX])
1791                 goto errout;
1792
1793         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1794         switch (ifindex) {
1795         case NETCONFA_IFINDEX_ALL:
1796                 devconf = net->ipv4.devconf_all;
1797                 break;
1798         case NETCONFA_IFINDEX_DEFAULT:
1799                 devconf = net->ipv4.devconf_dflt;
1800                 break;
1801         default:
1802                 dev = __dev_get_by_index(net, ifindex);
1803                 if (dev == NULL)
1804                         goto errout;
1805                 in_dev = __in_dev_get_rtnl(dev);
1806                 if (in_dev == NULL)
1807                         goto errout;
1808                 devconf = &in_dev->cnf;
1809                 break;
1810         }
1811
1812         err = -ENOBUFS;
1813         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1814         if (skb == NULL)
1815                 goto errout;
1816
1817         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1818                                         NETLINK_CB(in_skb).portid,
1819                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1820                                         -1);
1821         if (err < 0) {
1822                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1823                 WARN_ON(err == -EMSGSIZE);
1824                 kfree_skb(skb);
1825                 goto errout;
1826         }
1827         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1828 errout:
1829         return err;
1830 }
1831
1832 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1833                                      struct netlink_callback *cb)
1834 {
1835         struct net *net = sock_net(skb->sk);
1836         int h, s_h;
1837         int idx, s_idx;
1838         struct net_device *dev;
1839         struct in_device *in_dev;
1840         struct hlist_head *head;
1841
1842         s_h = cb->args[0];
1843         s_idx = idx = cb->args[1];
1844
1845         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1846                 idx = 0;
1847                 head = &net->dev_index_head[h];
1848                 rcu_read_lock();
1849                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1850                           net->dev_base_seq;
1851                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1852                         if (idx < s_idx)
1853                                 goto cont;
1854                         in_dev = __in_dev_get_rcu(dev);
1855                         if (!in_dev)
1856                                 goto cont;
1857
1858                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1859                                                       &in_dev->cnf,
1860                                                       NETLINK_CB(cb->skb).portid,
1861                                                       cb->nlh->nlmsg_seq,
1862                                                       RTM_NEWNETCONF,
1863                                                       NLM_F_MULTI,
1864                                                       -1) <= 0) {
1865                                 rcu_read_unlock();
1866                                 goto done;
1867                         }
1868                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1869 cont:
1870                         idx++;
1871                 }
1872                 rcu_read_unlock();
1873         }
1874         if (h == NETDEV_HASHENTRIES) {
1875                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1876                                               net->ipv4.devconf_all,
1877                                               NETLINK_CB(cb->skb).portid,
1878                                               cb->nlh->nlmsg_seq,
1879                                               RTM_NEWNETCONF, NLM_F_MULTI,
1880                                               -1) <= 0)
1881                         goto done;
1882                 else
1883                         h++;
1884         }
1885         if (h == NETDEV_HASHENTRIES + 1) {
1886                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1887                                               net->ipv4.devconf_dflt,
1888                                               NETLINK_CB(cb->skb).portid,
1889                                               cb->nlh->nlmsg_seq,
1890                                               RTM_NEWNETCONF, NLM_F_MULTI,
1891                                               -1) <= 0)
1892                         goto done;
1893                 else
1894                         h++;
1895         }
1896 done:
1897         cb->args[0] = h;
1898         cb->args[1] = idx;
1899
1900         return skb->len;
1901 }
1902
1903 #ifdef CONFIG_SYSCTL
1904
1905 static void devinet_copy_dflt_conf(struct net *net, int i)
1906 {
1907         struct net_device *dev;
1908
1909         rcu_read_lock();
1910         for_each_netdev_rcu(net, dev) {
1911                 struct in_device *in_dev;
1912
1913                 in_dev = __in_dev_get_rcu(dev);
1914                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1915                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1916         }
1917         rcu_read_unlock();
1918 }
1919
1920 /* called with RTNL locked */
1921 static void inet_forward_change(struct net *net)
1922 {
1923         struct net_device *dev;
1924         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1925
1926         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1927         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1928         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1929                                     NETCONFA_IFINDEX_ALL,
1930                                     net->ipv4.devconf_all);
1931         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1932                                     NETCONFA_IFINDEX_DEFAULT,
1933                                     net->ipv4.devconf_dflt);
1934
1935         for_each_netdev(net, dev) {
1936                 struct in_device *in_dev;
1937                 if (on)
1938                         dev_disable_lro(dev);
1939                 rcu_read_lock();
1940                 in_dev = __in_dev_get_rcu(dev);
1941                 if (in_dev) {
1942                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1943                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1944                                                     dev->ifindex, &in_dev->cnf);
1945                 }
1946                 rcu_read_unlock();
1947         }
1948 }
1949
1950 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1951                              void __user *buffer,
1952                              size_t *lenp, loff_t *ppos)
1953 {
1954         int old_value = *(int *)ctl->data;
1955         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1956         int new_value = *(int *)ctl->data;
1957
1958         if (write) {
1959                 struct ipv4_devconf *cnf = ctl->extra1;
1960                 struct net *net = ctl->extra2;
1961                 int i = (int *)ctl->data - cnf->data;
1962
1963                 set_bit(i, cnf->state);
1964
1965                 if (cnf == net->ipv4.devconf_dflt)
1966                         devinet_copy_dflt_conf(net, i);
1967                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1968                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1969                         if ((new_value == 0) && (old_value != 0))
1970                                 rt_cache_flush(net);
1971                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1972                     new_value != old_value) {
1973                         int ifindex;
1974
1975                         if (cnf == net->ipv4.devconf_dflt)
1976                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1977                         else if (cnf == net->ipv4.devconf_all)
1978                                 ifindex = NETCONFA_IFINDEX_ALL;
1979                         else {
1980                                 struct in_device *idev =
1981                                         container_of(cnf, struct in_device,
1982                                                      cnf);
1983                                 ifindex = idev->dev->ifindex;
1984                         }
1985                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1986                                                     ifindex, cnf);
1987                 }
1988         }
1989
1990         return ret;
1991 }
1992
1993 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
1994                                   void __user *buffer,
1995                                   size_t *lenp, loff_t *ppos)
1996 {
1997         int *valp = ctl->data;
1998         int val = *valp;
1999         loff_t pos = *ppos;
2000         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2001
2002         if (write && *valp != val) {
2003                 struct net *net = ctl->extra2;
2004
2005                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2006                         if (!rtnl_trylock()) {
2007                                 /* Restore the original values before restarting */
2008                                 *valp = val;
2009                                 *ppos = pos;
2010                                 return restart_syscall();
2011                         }
2012                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2013                                 inet_forward_change(net);
2014                         } else {
2015                                 struct ipv4_devconf *cnf = ctl->extra1;
2016                                 struct in_device *idev =
2017                                         container_of(cnf, struct in_device, cnf);
2018                                 if (*valp)
2019                                         dev_disable_lro(idev->dev);
2020                                 inet_netconf_notify_devconf(net,
2021                                                             NETCONFA_FORWARDING,
2022                                                             idev->dev->ifindex,
2023                                                             cnf);
2024                         }
2025                         rtnl_unlock();
2026                         rt_cache_flush(net);
2027                 } else
2028                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2029                                                     NETCONFA_IFINDEX_DEFAULT,
2030                                                     net->ipv4.devconf_dflt);
2031         }
2032
2033         return ret;
2034 }
2035
2036 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2037                                 void __user *buffer,
2038                                 size_t *lenp, loff_t *ppos)
2039 {
2040         int *valp = ctl->data;
2041         int val = *valp;
2042         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2043         struct net *net = ctl->extra2;
2044
2045         if (write && *valp != val)
2046                 rt_cache_flush(net);
2047
2048         return ret;
2049 }
2050
2051 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2052         { \
2053                 .procname       = name, \
2054                 .data           = ipv4_devconf.data + \
2055                                   IPV4_DEVCONF_ ## attr - 1, \
2056                 .maxlen         = sizeof(int), \
2057                 .mode           = mval, \
2058                 .proc_handler   = proc, \
2059                 .extra1         = &ipv4_devconf, \
2060         }
2061
2062 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2063         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2064
2065 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2066         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2067
2068 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2069         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2070
2071 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2072         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2073
2074 static struct devinet_sysctl_table {
2075         struct ctl_table_header *sysctl_header;
2076         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2077 } devinet_sysctl = {
2078         .devinet_vars = {
2079                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2080                                              devinet_sysctl_forward),
2081                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2082
2083                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2084                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2085                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2086                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2087                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2088                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2089                                         "accept_source_route"),
2090                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2091                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2092                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2093                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2094                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2095                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2096                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2097                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2098                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2099                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2100                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2101                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2102                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2103                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2104                                         "force_igmp_version"),
2105                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2106                                         "igmpv2_unsolicited_report_interval"),
2107                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2108                                         "igmpv3_unsolicited_report_interval"),
2109
2110                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2111                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2112                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2113                                               "promote_secondaries"),
2114                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2115                                               "route_localnet"),
2116         },
2117 };
2118
2119 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2120                                         struct ipv4_devconf *p)
2121 {
2122         int i;
2123         struct devinet_sysctl_table *t;
2124         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2125
2126         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2127         if (!t)
2128                 goto out;
2129
2130         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2131                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2132                 t->devinet_vars[i].extra1 = p;
2133                 t->devinet_vars[i].extra2 = net;
2134         }
2135
2136         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2137
2138         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2139         if (!t->sysctl_header)
2140                 goto free;
2141
2142         p->sysctl = t;
2143         return 0;
2144
2145 free:
2146         kfree(t);
2147 out:
2148         return -ENOBUFS;
2149 }
2150
2151 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2152 {
2153         struct devinet_sysctl_table *t = cnf->sysctl;
2154
2155         if (t == NULL)
2156                 return;
2157
2158         cnf->sysctl = NULL;
2159         unregister_net_sysctl_table(t->sysctl_header);
2160         kfree(t);
2161 }
2162
2163 static void devinet_sysctl_register(struct in_device *idev)
2164 {
2165         neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2166         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2167                                         &idev->cnf);
2168 }
2169
2170 static void devinet_sysctl_unregister(struct in_device *idev)
2171 {
2172         __devinet_sysctl_unregister(&idev->cnf);
2173         neigh_sysctl_unregister(idev->arp_parms);
2174 }
2175
2176 static struct ctl_table ctl_forward_entry[] = {
2177         {
2178                 .procname       = "ip_forward",
2179                 .data           = &ipv4_devconf.data[
2180                                         IPV4_DEVCONF_FORWARDING - 1],
2181                 .maxlen         = sizeof(int),
2182                 .mode           = 0644,
2183                 .proc_handler   = devinet_sysctl_forward,
2184                 .extra1         = &ipv4_devconf,
2185                 .extra2         = &init_net,
2186         },
2187         { },
2188 };
2189 #endif
2190
2191 static __net_init int devinet_init_net(struct net *net)
2192 {
2193         int err;
2194         struct ipv4_devconf *all, *dflt;
2195 #ifdef CONFIG_SYSCTL
2196         struct ctl_table *tbl = ctl_forward_entry;
2197         struct ctl_table_header *forw_hdr;
2198 #endif
2199
2200         err = -ENOMEM;
2201         all = &ipv4_devconf;
2202         dflt = &ipv4_devconf_dflt;
2203
2204         if (!net_eq(net, &init_net)) {
2205                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2206                 if (all == NULL)
2207                         goto err_alloc_all;
2208
2209                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2210                 if (dflt == NULL)
2211                         goto err_alloc_dflt;
2212
2213 #ifdef CONFIG_SYSCTL
2214                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2215                 if (tbl == NULL)
2216                         goto err_alloc_ctl;
2217
2218                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2219                 tbl[0].extra1 = all;
2220                 tbl[0].extra2 = net;
2221 #endif
2222         }
2223
2224 #ifdef CONFIG_SYSCTL
2225         err = __devinet_sysctl_register(net, "all", all);
2226         if (err < 0)
2227                 goto err_reg_all;
2228
2229         err = __devinet_sysctl_register(net, "default", dflt);
2230         if (err < 0)
2231                 goto err_reg_dflt;
2232
2233         err = -ENOMEM;
2234         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2235         if (forw_hdr == NULL)
2236                 goto err_reg_ctl;
2237         net->ipv4.forw_hdr = forw_hdr;
2238 #endif
2239
2240         net->ipv4.devconf_all = all;
2241         net->ipv4.devconf_dflt = dflt;
2242         return 0;
2243
2244 #ifdef CONFIG_SYSCTL
2245 err_reg_ctl:
2246         __devinet_sysctl_unregister(dflt);
2247 err_reg_dflt:
2248         __devinet_sysctl_unregister(all);
2249 err_reg_all:
2250         if (tbl != ctl_forward_entry)
2251                 kfree(tbl);
2252 err_alloc_ctl:
2253 #endif
2254         if (dflt != &ipv4_devconf_dflt)
2255                 kfree(dflt);
2256 err_alloc_dflt:
2257         if (all != &ipv4_devconf)
2258                 kfree(all);
2259 err_alloc_all:
2260         return err;
2261 }
2262
2263 static __net_exit void devinet_exit_net(struct net *net)
2264 {
2265 #ifdef CONFIG_SYSCTL
2266         struct ctl_table *tbl;
2267
2268         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2269         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2270         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2271         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2272         kfree(tbl);
2273 #endif
2274         kfree(net->ipv4.devconf_dflt);
2275         kfree(net->ipv4.devconf_all);
2276 }
2277
2278 static __net_initdata struct pernet_operations devinet_ops = {
2279         .init = devinet_init_net,
2280         .exit = devinet_exit_net,
2281 };
2282
2283 static struct rtnl_af_ops inet_af_ops = {
2284         .family           = AF_INET,
2285         .fill_link_af     = inet_fill_link_af,
2286         .get_link_af_size = inet_get_link_af_size,
2287         .validate_link_af = inet_validate_link_af,
2288         .set_link_af      = inet_set_link_af,
2289 };
2290
2291 void __init devinet_init(void)
2292 {
2293         int i;
2294
2295         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2296                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2297
2298         register_pernet_subsys(&devinet_ops);
2299
2300         register_gifconf(PF_INET, inet_gifconf);
2301         register_netdevice_notifier(&ip_netdev_notifier);
2302
2303         schedule_delayed_work(&check_lifetime_work, 0);
2304
2305         rtnl_af_register(&inet_af_ops);
2306
2307         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2308         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2309         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2310         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2311                       inet_netconf_dump_devconf, NULL);
2312 }
2313