[IA64] sim: Add casts to avoid assignment warnings
[cascardo/linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76         },
77 };
78
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80         .data = {
81                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86         },
87 };
88
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93         [IFA_LOCAL]             = { .type = NLA_U32 },
94         [IFA_ADDRESS]           = { .type = NLA_U32 },
95         [IFA_BROADCAST]         = { .type = NLA_U32 },
96         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
98 };
99
100 #define IN4_ADDR_HSIZE_SHIFT    8
101 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
102
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108         u32 val = (__force u32) addr ^ net_hash_mix(net);
109
110         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115         u32 hash = inet_addr_hash(net, ifa->ifa_local);
116
117         spin_lock(&inet_addr_hash_lock);
118         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119         spin_unlock(&inet_addr_hash_lock);
120 }
121
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124         spin_lock(&inet_addr_hash_lock);
125         hlist_del_init_rcu(&ifa->hash);
126         spin_unlock(&inet_addr_hash_lock);
127 }
128
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139         u32 hash = inet_addr_hash(net, addr);
140         struct net_device *result = NULL;
141         struct in_ifaddr *ifa;
142
143         rcu_read_lock();
144         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145                 if (ifa->ifa_local == addr) {
146                         struct net_device *dev = ifa->ifa_dev->dev;
147
148                         if (!net_eq(dev_net(dev), net))
149                                 continue;
150                         result = dev;
151                         break;
152                 }
153         }
154         if (!result) {
155                 struct flowi4 fl4 = { .daddr = addr };
156                 struct fib_result res = { 0 };
157                 struct fib_table *local;
158
159                 /* Fallback to FIB local table so that communication
160                  * over loopback subnets work.
161                  */
162                 local = fib_get_table(net, RT_TABLE_LOCAL);
163                 if (local &&
164                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165                     res.type == RTN_LOCAL)
166                         result = FIB_RES_DEV(res);
167         }
168         if (result && devref)
169                 dev_hold(result);
170         rcu_read_unlock();
171         return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179                          int destroy);
180 #ifdef CONFIG_SYSCTL
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static void devinet_sysctl_register(struct in_device *idev)
185 {
186 }
187 static void devinet_sysctl_unregister(struct in_device *idev)
188 {
189 }
190 #endif
191
192 /* Locks all the inet devices. */
193
194 static struct in_ifaddr *inet_alloc_ifa(void)
195 {
196         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 }
198
199 static void inet_rcu_free_ifa(struct rcu_head *head)
200 {
201         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202         if (ifa->ifa_dev)
203                 in_dev_put(ifa->ifa_dev);
204         kfree(ifa);
205 }
206
207 static void inet_free_ifa(struct in_ifaddr *ifa)
208 {
209         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 }
211
212 void in_dev_finish_destroy(struct in_device *idev)
213 {
214         struct net_device *dev = idev->dev;
215
216         WARN_ON(idev->ifa_list);
217         WARN_ON(idev->mc_list);
218 #ifdef NET_REFCNT_DEBUG
219         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
220 #endif
221         dev_put(dev);
222         if (!idev->dead)
223                 pr_err("Freeing alive in_device %p\n", idev);
224         else
225                 kfree(idev);
226 }
227 EXPORT_SYMBOL(in_dev_finish_destroy);
228
229 static struct in_device *inetdev_init(struct net_device *dev)
230 {
231         struct in_device *in_dev;
232
233         ASSERT_RTNL();
234
235         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
236         if (!in_dev)
237                 goto out;
238         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
239                         sizeof(in_dev->cnf));
240         in_dev->cnf.sysctl = NULL;
241         in_dev->dev = dev;
242         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
243         if (!in_dev->arp_parms)
244                 goto out_kfree;
245         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
246                 dev_disable_lro(dev);
247         /* Reference in_dev->dev */
248         dev_hold(dev);
249         /* Account for reference dev->ip_ptr (below) */
250         in_dev_hold(in_dev);
251
252         devinet_sysctl_register(in_dev);
253         ip_mc_init_dev(in_dev);
254         if (dev->flags & IFF_UP)
255                 ip_mc_up(in_dev);
256
257         /* we can receive as soon as ip_ptr is set -- do this last */
258         rcu_assign_pointer(dev->ip_ptr, in_dev);
259 out:
260         return in_dev;
261 out_kfree:
262         kfree(in_dev);
263         in_dev = NULL;
264         goto out;
265 }
266
267 static void in_dev_rcu_put(struct rcu_head *head)
268 {
269         struct in_device *idev = container_of(head, struct in_device, rcu_head);
270         in_dev_put(idev);
271 }
272
273 static void inetdev_destroy(struct in_device *in_dev)
274 {
275         struct in_ifaddr *ifa;
276         struct net_device *dev;
277
278         ASSERT_RTNL();
279
280         dev = in_dev->dev;
281
282         in_dev->dead = 1;
283
284         ip_mc_destroy_dev(in_dev);
285
286         while ((ifa = in_dev->ifa_list) != NULL) {
287                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
288                 inet_free_ifa(ifa);
289         }
290
291         RCU_INIT_POINTER(dev->ip_ptr, NULL);
292
293         devinet_sysctl_unregister(in_dev);
294         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295         arp_ifdown(dev);
296
297         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
298 }
299
300 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
301 {
302         rcu_read_lock();
303         for_primary_ifa(in_dev) {
304                 if (inet_ifa_match(a, ifa)) {
305                         if (!b || inet_ifa_match(b, ifa)) {
306                                 rcu_read_unlock();
307                                 return 1;
308                         }
309                 }
310         } endfor_ifa(in_dev);
311         rcu_read_unlock();
312         return 0;
313 }
314
315 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
316                          int destroy, struct nlmsghdr *nlh, u32 portid)
317 {
318         struct in_ifaddr *promote = NULL;
319         struct in_ifaddr *ifa, *ifa1 = *ifap;
320         struct in_ifaddr *last_prim = in_dev->ifa_list;
321         struct in_ifaddr *prev_prom = NULL;
322         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
323
324         ASSERT_RTNL();
325
326         /* 1. Deleting primary ifaddr forces deletion all secondaries
327          * unless alias promotion is set
328          **/
329
330         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
331                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
332
333                 while ((ifa = *ifap1) != NULL) {
334                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
335                             ifa1->ifa_scope <= ifa->ifa_scope)
336                                 last_prim = ifa;
337
338                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
339                             ifa1->ifa_mask != ifa->ifa_mask ||
340                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
341                                 ifap1 = &ifa->ifa_next;
342                                 prev_prom = ifa;
343                                 continue;
344                         }
345
346                         if (!do_promote) {
347                                 inet_hash_remove(ifa);
348                                 *ifap1 = ifa->ifa_next;
349
350                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
351                                 blocking_notifier_call_chain(&inetaddr_chain,
352                                                 NETDEV_DOWN, ifa);
353                                 inet_free_ifa(ifa);
354                         } else {
355                                 promote = ifa;
356                                 break;
357                         }
358                 }
359         }
360
361         /* On promotion all secondaries from subnet are changing
362          * the primary IP, we must remove all their routes silently
363          * and later to add them back with new prefsrc. Do this
364          * while all addresses are on the device list.
365          */
366         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
367                 if (ifa1->ifa_mask == ifa->ifa_mask &&
368                     inet_ifa_match(ifa1->ifa_address, ifa))
369                         fib_del_ifaddr(ifa, ifa1);
370         }
371
372         /* 2. Unlink it */
373
374         *ifap = ifa1->ifa_next;
375         inet_hash_remove(ifa1);
376
377         /* 3. Announce address deletion */
378
379         /* Send message first, then call notifier.
380            At first sight, FIB update triggered by notifier
381            will refer to already deleted ifaddr, that could confuse
382            netlink listeners. It is not true: look, gated sees
383            that route deleted and if it still thinks that ifaddr
384            is valid, it will try to restore deleted routes... Grr.
385            So that, this order is correct.
386          */
387         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
388         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
389
390         if (promote) {
391                 struct in_ifaddr *next_sec = promote->ifa_next;
392
393                 if (prev_prom) {
394                         prev_prom->ifa_next = promote->ifa_next;
395                         promote->ifa_next = last_prim->ifa_next;
396                         last_prim->ifa_next = promote;
397                 }
398
399                 promote->ifa_flags &= ~IFA_F_SECONDARY;
400                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
401                 blocking_notifier_call_chain(&inetaddr_chain,
402                                 NETDEV_UP, promote);
403                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
404                         if (ifa1->ifa_mask != ifa->ifa_mask ||
405                             !inet_ifa_match(ifa1->ifa_address, ifa))
406                                         continue;
407                         fib_add_ifaddr(ifa);
408                 }
409
410         }
411         if (destroy)
412                 inet_free_ifa(ifa1);
413 }
414
415 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
416                          int destroy)
417 {
418         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
419 }
420
421 static void check_lifetime(struct work_struct *work);
422
423 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
424
425 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
426                              u32 portid)
427 {
428         struct in_device *in_dev = ifa->ifa_dev;
429         struct in_ifaddr *ifa1, **ifap, **last_primary;
430
431         ASSERT_RTNL();
432
433         if (!ifa->ifa_local) {
434                 inet_free_ifa(ifa);
435                 return 0;
436         }
437
438         ifa->ifa_flags &= ~IFA_F_SECONDARY;
439         last_primary = &in_dev->ifa_list;
440
441         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442              ifap = &ifa1->ifa_next) {
443                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444                     ifa->ifa_scope <= ifa1->ifa_scope)
445                         last_primary = &ifa1->ifa_next;
446                 if (ifa1->ifa_mask == ifa->ifa_mask &&
447                     inet_ifa_match(ifa1->ifa_address, ifa)) {
448                         if (ifa1->ifa_local == ifa->ifa_local) {
449                                 inet_free_ifa(ifa);
450                                 return -EEXIST;
451                         }
452                         if (ifa1->ifa_scope != ifa->ifa_scope) {
453                                 inet_free_ifa(ifa);
454                                 return -EINVAL;
455                         }
456                         ifa->ifa_flags |= IFA_F_SECONDARY;
457                 }
458         }
459
460         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461                 net_srandom(ifa->ifa_local);
462                 ifap = last_primary;
463         }
464
465         ifa->ifa_next = *ifap;
466         *ifap = ifa;
467
468         inet_hash_insert(dev_net(in_dev->dev), ifa);
469
470         cancel_delayed_work(&check_lifetime_work);
471         schedule_delayed_work(&check_lifetime_work, 0);
472
473         /* Send message first, then call notifier.
474            Notifier will trigger FIB update, so that
475            listeners of netlink will know about new ifaddr */
476         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
477         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
478
479         return 0;
480 }
481
482 static int inet_insert_ifa(struct in_ifaddr *ifa)
483 {
484         return __inet_insert_ifa(ifa, NULL, 0);
485 }
486
487 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
488 {
489         struct in_device *in_dev = __in_dev_get_rtnl(dev);
490
491         ASSERT_RTNL();
492
493         if (!in_dev) {
494                 inet_free_ifa(ifa);
495                 return -ENOBUFS;
496         }
497         ipv4_devconf_setall(in_dev);
498         if (ifa->ifa_dev != in_dev) {
499                 WARN_ON(ifa->ifa_dev);
500                 in_dev_hold(in_dev);
501                 ifa->ifa_dev = in_dev;
502         }
503         if (ipv4_is_loopback(ifa->ifa_local))
504                 ifa->ifa_scope = RT_SCOPE_HOST;
505         return inet_insert_ifa(ifa);
506 }
507
508 /* Caller must hold RCU or RTNL :
509  * We dont take a reference on found in_device
510  */
511 struct in_device *inetdev_by_index(struct net *net, int ifindex)
512 {
513         struct net_device *dev;
514         struct in_device *in_dev = NULL;
515
516         rcu_read_lock();
517         dev = dev_get_by_index_rcu(net, ifindex);
518         if (dev)
519                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
520         rcu_read_unlock();
521         return in_dev;
522 }
523 EXPORT_SYMBOL(inetdev_by_index);
524
525 /* Called only from RTNL semaphored context. No locks. */
526
527 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
528                                     __be32 mask)
529 {
530         ASSERT_RTNL();
531
532         for_primary_ifa(in_dev) {
533                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
534                         return ifa;
535         } endfor_ifa(in_dev);
536         return NULL;
537 }
538
539 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
540 {
541         struct net *net = sock_net(skb->sk);
542         struct nlattr *tb[IFA_MAX+1];
543         struct in_device *in_dev;
544         struct ifaddrmsg *ifm;
545         struct in_ifaddr *ifa, **ifap;
546         int err = -EINVAL;
547
548         ASSERT_RTNL();
549
550         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
551         if (err < 0)
552                 goto errout;
553
554         ifm = nlmsg_data(nlh);
555         in_dev = inetdev_by_index(net, ifm->ifa_index);
556         if (in_dev == NULL) {
557                 err = -ENODEV;
558                 goto errout;
559         }
560
561         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562              ifap = &ifa->ifa_next) {
563                 if (tb[IFA_LOCAL] &&
564                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
565                         continue;
566
567                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
568                         continue;
569
570                 if (tb[IFA_ADDRESS] &&
571                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
573                         continue;
574
575                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
576                 return 0;
577         }
578
579         err = -EADDRNOTAVAIL;
580 errout:
581         return err;
582 }
583
584 #define INFINITY_LIFE_TIME      0xFFFFFFFF
585
586 static void check_lifetime(struct work_struct *work)
587 {
588         unsigned long now, next, next_sec, next_sched;
589         struct in_ifaddr *ifa;
590         struct hlist_node *n;
591         int i;
592
593         now = jiffies;
594         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
595
596         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597                 bool change_needed = false;
598
599                 rcu_read_lock();
600                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
601                         unsigned long age;
602
603                         if (ifa->ifa_flags & IFA_F_PERMANENT)
604                                 continue;
605
606                         /* We try to batch several events at once. */
607                         age = (now - ifa->ifa_tstamp +
608                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
609
610                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
611                             age >= ifa->ifa_valid_lft) {
612                                 change_needed = true;
613                         } else if (ifa->ifa_preferred_lft ==
614                                    INFINITY_LIFE_TIME) {
615                                 continue;
616                         } else if (age >= ifa->ifa_preferred_lft) {
617                                 if (time_before(ifa->ifa_tstamp +
618                                                 ifa->ifa_valid_lft * HZ, next))
619                                         next = ifa->ifa_tstamp +
620                                                ifa->ifa_valid_lft * HZ;
621
622                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
623                                         change_needed = true;
624                         } else if (time_before(ifa->ifa_tstamp +
625                                                ifa->ifa_preferred_lft * HZ,
626                                                next)) {
627                                 next = ifa->ifa_tstamp +
628                                        ifa->ifa_preferred_lft * HZ;
629                         }
630                 }
631                 rcu_read_unlock();
632                 if (!change_needed)
633                         continue;
634                 rtnl_lock();
635                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
636                         unsigned long age;
637
638                         if (ifa->ifa_flags & IFA_F_PERMANENT)
639                                 continue;
640
641                         /* We try to batch several events at once. */
642                         age = (now - ifa->ifa_tstamp +
643                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
644
645                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
646                             age >= ifa->ifa_valid_lft) {
647                                 struct in_ifaddr **ifap;
648
649                                 for (ifap = &ifa->ifa_dev->ifa_list;
650                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
651                                         if (*ifap == ifa) {
652                                                 inet_del_ifa(ifa->ifa_dev,
653                                                              ifap, 1);
654                                                 break;
655                                         }
656                                 }
657                         } else if (ifa->ifa_preferred_lft !=
658                                    INFINITY_LIFE_TIME &&
659                                    age >= ifa->ifa_preferred_lft &&
660                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
661                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
662                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
663                         }
664                 }
665                 rtnl_unlock();
666         }
667
668         next_sec = round_jiffies_up(next);
669         next_sched = next;
670
671         /* If rounded timeout is accurate enough, accept it. */
672         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
673                 next_sched = next_sec;
674
675         now = jiffies;
676         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
677         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
678                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
679
680         schedule_delayed_work(&check_lifetime_work, next_sched - now);
681 }
682
683 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
684                              __u32 prefered_lft)
685 {
686         unsigned long timeout;
687
688         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
689
690         timeout = addrconf_timeout_fixup(valid_lft, HZ);
691         if (addrconf_finite_timeout(timeout))
692                 ifa->ifa_valid_lft = timeout;
693         else
694                 ifa->ifa_flags |= IFA_F_PERMANENT;
695
696         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
697         if (addrconf_finite_timeout(timeout)) {
698                 if (timeout == 0)
699                         ifa->ifa_flags |= IFA_F_DEPRECATED;
700                 ifa->ifa_preferred_lft = timeout;
701         }
702         ifa->ifa_tstamp = jiffies;
703         if (!ifa->ifa_cstamp)
704                 ifa->ifa_cstamp = ifa->ifa_tstamp;
705 }
706
707 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
708                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
709 {
710         struct nlattr *tb[IFA_MAX+1];
711         struct in_ifaddr *ifa;
712         struct ifaddrmsg *ifm;
713         struct net_device *dev;
714         struct in_device *in_dev;
715         int err;
716
717         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
718         if (err < 0)
719                 goto errout;
720
721         ifm = nlmsg_data(nlh);
722         err = -EINVAL;
723         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
724                 goto errout;
725
726         dev = __dev_get_by_index(net, ifm->ifa_index);
727         err = -ENODEV;
728         if (dev == NULL)
729                 goto errout;
730
731         in_dev = __in_dev_get_rtnl(dev);
732         err = -ENOBUFS;
733         if (in_dev == NULL)
734                 goto errout;
735
736         ifa = inet_alloc_ifa();
737         if (ifa == NULL)
738                 /*
739                  * A potential indev allocation can be left alive, it stays
740                  * assigned to its device and is destroy with it.
741                  */
742                 goto errout;
743
744         ipv4_devconf_setall(in_dev);
745         in_dev_hold(in_dev);
746
747         if (tb[IFA_ADDRESS] == NULL)
748                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
749
750         INIT_HLIST_NODE(&ifa->hash);
751         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
752         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
753         ifa->ifa_flags = ifm->ifa_flags;
754         ifa->ifa_scope = ifm->ifa_scope;
755         ifa->ifa_dev = in_dev;
756
757         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
758         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
759
760         if (tb[IFA_BROADCAST])
761                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
762
763         if (tb[IFA_LABEL])
764                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
765         else
766                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
767
768         if (tb[IFA_CACHEINFO]) {
769                 struct ifa_cacheinfo *ci;
770
771                 ci = nla_data(tb[IFA_CACHEINFO]);
772                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
773                         err = -EINVAL;
774                         goto errout;
775                 }
776                 *pvalid_lft = ci->ifa_valid;
777                 *pprefered_lft = ci->ifa_prefered;
778         }
779
780         return ifa;
781
782 errout:
783         return ERR_PTR(err);
784 }
785
786 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
787 {
788         struct in_device *in_dev = ifa->ifa_dev;
789         struct in_ifaddr *ifa1, **ifap;
790
791         if (!ifa->ifa_local)
792                 return NULL;
793
794         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
795              ifap = &ifa1->ifa_next) {
796                 if (ifa1->ifa_mask == ifa->ifa_mask &&
797                     inet_ifa_match(ifa1->ifa_address, ifa) &&
798                     ifa1->ifa_local == ifa->ifa_local)
799                         return ifa1;
800         }
801         return NULL;
802 }
803
804 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
805 {
806         struct net *net = sock_net(skb->sk);
807         struct in_ifaddr *ifa;
808         struct in_ifaddr *ifa_existing;
809         __u32 valid_lft = INFINITY_LIFE_TIME;
810         __u32 prefered_lft = INFINITY_LIFE_TIME;
811
812         ASSERT_RTNL();
813
814         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
815         if (IS_ERR(ifa))
816                 return PTR_ERR(ifa);
817
818         ifa_existing = find_matching_ifa(ifa);
819         if (!ifa_existing) {
820                 /* It would be best to check for !NLM_F_CREATE here but
821                  * userspace alreay relies on not having to provide this.
822                  */
823                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
824                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
825         } else {
826                 inet_free_ifa(ifa);
827
828                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
829                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
830                         return -EEXIST;
831                 ifa = ifa_existing;
832                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
833                 cancel_delayed_work(&check_lifetime_work);
834                 schedule_delayed_work(&check_lifetime_work, 0);
835                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
836                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
837         }
838         return 0;
839 }
840
841 /*
842  *      Determine a default network mask, based on the IP address.
843  */
844
845 static int inet_abc_len(__be32 addr)
846 {
847         int rc = -1;    /* Something else, probably a multicast. */
848
849         if (ipv4_is_zeronet(addr))
850                 rc = 0;
851         else {
852                 __u32 haddr = ntohl(addr);
853
854                 if (IN_CLASSA(haddr))
855                         rc = 8;
856                 else if (IN_CLASSB(haddr))
857                         rc = 16;
858                 else if (IN_CLASSC(haddr))
859                         rc = 24;
860         }
861
862         return rc;
863 }
864
865
866 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
867 {
868         struct ifreq ifr;
869         struct sockaddr_in sin_orig;
870         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
871         struct in_device *in_dev;
872         struct in_ifaddr **ifap = NULL;
873         struct in_ifaddr *ifa = NULL;
874         struct net_device *dev;
875         char *colon;
876         int ret = -EFAULT;
877         int tryaddrmatch = 0;
878
879         /*
880          *      Fetch the caller's info block into kernel space
881          */
882
883         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
884                 goto out;
885         ifr.ifr_name[IFNAMSIZ - 1] = 0;
886
887         /* save original address for comparison */
888         memcpy(&sin_orig, sin, sizeof(*sin));
889
890         colon = strchr(ifr.ifr_name, ':');
891         if (colon)
892                 *colon = 0;
893
894         dev_load(net, ifr.ifr_name);
895
896         switch (cmd) {
897         case SIOCGIFADDR:       /* Get interface address */
898         case SIOCGIFBRDADDR:    /* Get the broadcast address */
899         case SIOCGIFDSTADDR:    /* Get the destination address */
900         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
901                 /* Note that these ioctls will not sleep,
902                    so that we do not impose a lock.
903                    One day we will be forced to put shlock here (I mean SMP)
904                  */
905                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
906                 memset(sin, 0, sizeof(*sin));
907                 sin->sin_family = AF_INET;
908                 break;
909
910         case SIOCSIFFLAGS:
911                 ret = -EPERM;
912                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
913                         goto out;
914                 break;
915         case SIOCSIFADDR:       /* Set interface address (and family) */
916         case SIOCSIFBRDADDR:    /* Set the broadcast address */
917         case SIOCSIFDSTADDR:    /* Set the destination address */
918         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
919                 ret = -EPERM;
920                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
921                         goto out;
922                 ret = -EINVAL;
923                 if (sin->sin_family != AF_INET)
924                         goto out;
925                 break;
926         default:
927                 ret = -EINVAL;
928                 goto out;
929         }
930
931         rtnl_lock();
932
933         ret = -ENODEV;
934         dev = __dev_get_by_name(net, ifr.ifr_name);
935         if (!dev)
936                 goto done;
937
938         if (colon)
939                 *colon = ':';
940
941         in_dev = __in_dev_get_rtnl(dev);
942         if (in_dev) {
943                 if (tryaddrmatch) {
944                         /* Matthias Andree */
945                         /* compare label and address (4.4BSD style) */
946                         /* note: we only do this for a limited set of ioctls
947                            and only if the original address family was AF_INET.
948                            This is checked above. */
949                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
950                              ifap = &ifa->ifa_next) {
951                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
952                                     sin_orig.sin_addr.s_addr ==
953                                                         ifa->ifa_local) {
954                                         break; /* found */
955                                 }
956                         }
957                 }
958                 /* we didn't get a match, maybe the application is
959                    4.3BSD-style and passed in junk so we fall back to
960                    comparing just the label */
961                 if (!ifa) {
962                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
963                              ifap = &ifa->ifa_next)
964                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
965                                         break;
966                 }
967         }
968
969         ret = -EADDRNOTAVAIL;
970         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
971                 goto done;
972
973         switch (cmd) {
974         case SIOCGIFADDR:       /* Get interface address */
975                 sin->sin_addr.s_addr = ifa->ifa_local;
976                 goto rarok;
977
978         case SIOCGIFBRDADDR:    /* Get the broadcast address */
979                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
980                 goto rarok;
981
982         case SIOCGIFDSTADDR:    /* Get the destination address */
983                 sin->sin_addr.s_addr = ifa->ifa_address;
984                 goto rarok;
985
986         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
987                 sin->sin_addr.s_addr = ifa->ifa_mask;
988                 goto rarok;
989
990         case SIOCSIFFLAGS:
991                 if (colon) {
992                         ret = -EADDRNOTAVAIL;
993                         if (!ifa)
994                                 break;
995                         ret = 0;
996                         if (!(ifr.ifr_flags & IFF_UP))
997                                 inet_del_ifa(in_dev, ifap, 1);
998                         break;
999                 }
1000                 ret = dev_change_flags(dev, ifr.ifr_flags);
1001                 break;
1002
1003         case SIOCSIFADDR:       /* Set interface address (and family) */
1004                 ret = -EINVAL;
1005                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1006                         break;
1007
1008                 if (!ifa) {
1009                         ret = -ENOBUFS;
1010                         ifa = inet_alloc_ifa();
1011                         if (!ifa)
1012                                 break;
1013                         INIT_HLIST_NODE(&ifa->hash);
1014                         if (colon)
1015                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1016                         else
1017                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1018                 } else {
1019                         ret = 0;
1020                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1021                                 break;
1022                         inet_del_ifa(in_dev, ifap, 0);
1023                         ifa->ifa_broadcast = 0;
1024                         ifa->ifa_scope = 0;
1025                 }
1026
1027                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1028
1029                 if (!(dev->flags & IFF_POINTOPOINT)) {
1030                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1031                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1032                         if ((dev->flags & IFF_BROADCAST) &&
1033                             ifa->ifa_prefixlen < 31)
1034                                 ifa->ifa_broadcast = ifa->ifa_address |
1035                                                      ~ifa->ifa_mask;
1036                 } else {
1037                         ifa->ifa_prefixlen = 32;
1038                         ifa->ifa_mask = inet_make_mask(32);
1039                 }
1040                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1041                 ret = inet_set_ifa(dev, ifa);
1042                 break;
1043
1044         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1045                 ret = 0;
1046                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1047                         inet_del_ifa(in_dev, ifap, 0);
1048                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1049                         inet_insert_ifa(ifa);
1050                 }
1051                 break;
1052
1053         case SIOCSIFDSTADDR:    /* Set the destination address */
1054                 ret = 0;
1055                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1056                         break;
1057                 ret = -EINVAL;
1058                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1059                         break;
1060                 ret = 0;
1061                 inet_del_ifa(in_dev, ifap, 0);
1062                 ifa->ifa_address = sin->sin_addr.s_addr;
1063                 inet_insert_ifa(ifa);
1064                 break;
1065
1066         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1067
1068                 /*
1069                  *      The mask we set must be legal.
1070                  */
1071                 ret = -EINVAL;
1072                 if (bad_mask(sin->sin_addr.s_addr, 0))
1073                         break;
1074                 ret = 0;
1075                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1076                         __be32 old_mask = ifa->ifa_mask;
1077                         inet_del_ifa(in_dev, ifap, 0);
1078                         ifa->ifa_mask = sin->sin_addr.s_addr;
1079                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1080
1081                         /* See if current broadcast address matches
1082                          * with current netmask, then recalculate
1083                          * the broadcast address. Otherwise it's a
1084                          * funny address, so don't touch it since
1085                          * the user seems to know what (s)he's doing...
1086                          */
1087                         if ((dev->flags & IFF_BROADCAST) &&
1088                             (ifa->ifa_prefixlen < 31) &&
1089                             (ifa->ifa_broadcast ==
1090                              (ifa->ifa_local|~old_mask))) {
1091                                 ifa->ifa_broadcast = (ifa->ifa_local |
1092                                                       ~sin->sin_addr.s_addr);
1093                         }
1094                         inet_insert_ifa(ifa);
1095                 }
1096                 break;
1097         }
1098 done:
1099         rtnl_unlock();
1100 out:
1101         return ret;
1102 rarok:
1103         rtnl_unlock();
1104         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1105         goto out;
1106 }
1107
1108 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1109 {
1110         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1111         struct in_ifaddr *ifa;
1112         struct ifreq ifr;
1113         int done = 0;
1114
1115         if (!in_dev)
1116                 goto out;
1117
1118         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1119                 if (!buf) {
1120                         done += sizeof(ifr);
1121                         continue;
1122                 }
1123                 if (len < (int) sizeof(ifr))
1124                         break;
1125                 memset(&ifr, 0, sizeof(struct ifreq));
1126                 if (ifa->ifa_label)
1127                         strcpy(ifr.ifr_name, ifa->ifa_label);
1128                 else
1129                         strcpy(ifr.ifr_name, dev->name);
1130
1131                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1132                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1133                                                                 ifa->ifa_local;
1134
1135                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1136                         done = -EFAULT;
1137                         break;
1138                 }
1139                 buf  += sizeof(struct ifreq);
1140                 len  -= sizeof(struct ifreq);
1141                 done += sizeof(struct ifreq);
1142         }
1143 out:
1144         return done;
1145 }
1146
1147 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1148 {
1149         __be32 addr = 0;
1150         struct in_device *in_dev;
1151         struct net *net = dev_net(dev);
1152
1153         rcu_read_lock();
1154         in_dev = __in_dev_get_rcu(dev);
1155         if (!in_dev)
1156                 goto no_in_dev;
1157
1158         for_primary_ifa(in_dev) {
1159                 if (ifa->ifa_scope > scope)
1160                         continue;
1161                 if (!dst || inet_ifa_match(dst, ifa)) {
1162                         addr = ifa->ifa_local;
1163                         break;
1164                 }
1165                 if (!addr)
1166                         addr = ifa->ifa_local;
1167         } endfor_ifa(in_dev);
1168
1169         if (addr)
1170                 goto out_unlock;
1171 no_in_dev:
1172
1173         /* Not loopback addresses on loopback should be preferred
1174            in this case. It is importnat that lo is the first interface
1175            in dev_base list.
1176          */
1177         for_each_netdev_rcu(net, dev) {
1178                 in_dev = __in_dev_get_rcu(dev);
1179                 if (!in_dev)
1180                         continue;
1181
1182                 for_primary_ifa(in_dev) {
1183                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1184                             ifa->ifa_scope <= scope) {
1185                                 addr = ifa->ifa_local;
1186                                 goto out_unlock;
1187                         }
1188                 } endfor_ifa(in_dev);
1189         }
1190 out_unlock:
1191         rcu_read_unlock();
1192         return addr;
1193 }
1194 EXPORT_SYMBOL(inet_select_addr);
1195
1196 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1197                               __be32 local, int scope)
1198 {
1199         int same = 0;
1200         __be32 addr = 0;
1201
1202         for_ifa(in_dev) {
1203                 if (!addr &&
1204                     (local == ifa->ifa_local || !local) &&
1205                     ifa->ifa_scope <= scope) {
1206                         addr = ifa->ifa_local;
1207                         if (same)
1208                                 break;
1209                 }
1210                 if (!same) {
1211                         same = (!local || inet_ifa_match(local, ifa)) &&
1212                                 (!dst || inet_ifa_match(dst, ifa));
1213                         if (same && addr) {
1214                                 if (local || !dst)
1215                                         break;
1216                                 /* Is the selected addr into dst subnet? */
1217                                 if (inet_ifa_match(addr, ifa))
1218                                         break;
1219                                 /* No, then can we use new local src? */
1220                                 if (ifa->ifa_scope <= scope) {
1221                                         addr = ifa->ifa_local;
1222                                         break;
1223                                 }
1224                                 /* search for large dst subnet for addr */
1225                                 same = 0;
1226                         }
1227                 }
1228         } endfor_ifa(in_dev);
1229
1230         return same ? addr : 0;
1231 }
1232
1233 /*
1234  * Confirm that local IP address exists using wildcards:
1235  * - in_dev: only on this interface, 0=any interface
1236  * - dst: only in the same subnet as dst, 0=any dst
1237  * - local: address, 0=autoselect the local address
1238  * - scope: maximum allowed scope value for the local address
1239  */
1240 __be32 inet_confirm_addr(struct in_device *in_dev,
1241                          __be32 dst, __be32 local, int scope)
1242 {
1243         __be32 addr = 0;
1244         struct net_device *dev;
1245         struct net *net;
1246
1247         if (scope != RT_SCOPE_LINK)
1248                 return confirm_addr_indev(in_dev, dst, local, scope);
1249
1250         net = dev_net(in_dev->dev);
1251         rcu_read_lock();
1252         for_each_netdev_rcu(net, dev) {
1253                 in_dev = __in_dev_get_rcu(dev);
1254                 if (in_dev) {
1255                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1256                         if (addr)
1257                                 break;
1258                 }
1259         }
1260         rcu_read_unlock();
1261
1262         return addr;
1263 }
1264 EXPORT_SYMBOL(inet_confirm_addr);
1265
1266 /*
1267  *      Device notifier
1268  */
1269
1270 int register_inetaddr_notifier(struct notifier_block *nb)
1271 {
1272         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1273 }
1274 EXPORT_SYMBOL(register_inetaddr_notifier);
1275
1276 int unregister_inetaddr_notifier(struct notifier_block *nb)
1277 {
1278         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1279 }
1280 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1281
1282 /* Rename ifa_labels for a device name change. Make some effort to preserve
1283  * existing alias numbering and to create unique labels if possible.
1284 */
1285 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1286 {
1287         struct in_ifaddr *ifa;
1288         int named = 0;
1289
1290         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1291                 char old[IFNAMSIZ], *dot;
1292
1293                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1294                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1295                 if (named++ == 0)
1296                         goto skip;
1297                 dot = strchr(old, ':');
1298                 if (dot == NULL) {
1299                         sprintf(old, ":%d", named);
1300                         dot = old;
1301                 }
1302                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1303                         strcat(ifa->ifa_label, dot);
1304                 else
1305                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1306 skip:
1307                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1308         }
1309 }
1310
1311 static bool inetdev_valid_mtu(unsigned int mtu)
1312 {
1313         return mtu >= 68;
1314 }
1315
1316 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1317                                         struct in_device *in_dev)
1318
1319 {
1320         struct in_ifaddr *ifa;
1321
1322         for (ifa = in_dev->ifa_list; ifa;
1323              ifa = ifa->ifa_next) {
1324                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1325                          ifa->ifa_local, dev,
1326                          ifa->ifa_local, NULL,
1327                          dev->dev_addr, NULL);
1328         }
1329 }
1330
1331 /* Called only under RTNL semaphore */
1332
1333 static int inetdev_event(struct notifier_block *this, unsigned long event,
1334                          void *ptr)
1335 {
1336         struct net_device *dev = ptr;
1337         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1338
1339         ASSERT_RTNL();
1340
1341         if (!in_dev) {
1342                 if (event == NETDEV_REGISTER) {
1343                         in_dev = inetdev_init(dev);
1344                         if (!in_dev)
1345                                 return notifier_from_errno(-ENOMEM);
1346                         if (dev->flags & IFF_LOOPBACK) {
1347                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1348                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1349                         }
1350                 } else if (event == NETDEV_CHANGEMTU) {
1351                         /* Re-enabling IP */
1352                         if (inetdev_valid_mtu(dev->mtu))
1353                                 in_dev = inetdev_init(dev);
1354                 }
1355                 goto out;
1356         }
1357
1358         switch (event) {
1359         case NETDEV_REGISTER:
1360                 pr_debug("%s: bug\n", __func__);
1361                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1362                 break;
1363         case NETDEV_UP:
1364                 if (!inetdev_valid_mtu(dev->mtu))
1365                         break;
1366                 if (dev->flags & IFF_LOOPBACK) {
1367                         struct in_ifaddr *ifa = inet_alloc_ifa();
1368
1369                         if (ifa) {
1370                                 INIT_HLIST_NODE(&ifa->hash);
1371                                 ifa->ifa_local =
1372                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1373                                 ifa->ifa_prefixlen = 8;
1374                                 ifa->ifa_mask = inet_make_mask(8);
1375                                 in_dev_hold(in_dev);
1376                                 ifa->ifa_dev = in_dev;
1377                                 ifa->ifa_scope = RT_SCOPE_HOST;
1378                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1379                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1380                                                  INFINITY_LIFE_TIME);
1381                                 inet_insert_ifa(ifa);
1382                         }
1383                 }
1384                 ip_mc_up(in_dev);
1385                 /* fall through */
1386         case NETDEV_CHANGEADDR:
1387                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1388                         break;
1389                 /* fall through */
1390         case NETDEV_NOTIFY_PEERS:
1391                 /* Send gratuitous ARP to notify of link change */
1392                 inetdev_send_gratuitous_arp(dev, in_dev);
1393                 break;
1394         case NETDEV_DOWN:
1395                 ip_mc_down(in_dev);
1396                 break;
1397         case NETDEV_PRE_TYPE_CHANGE:
1398                 ip_mc_unmap(in_dev);
1399                 break;
1400         case NETDEV_POST_TYPE_CHANGE:
1401                 ip_mc_remap(in_dev);
1402                 break;
1403         case NETDEV_CHANGEMTU:
1404                 if (inetdev_valid_mtu(dev->mtu))
1405                         break;
1406                 /* disable IP when MTU is not enough */
1407         case NETDEV_UNREGISTER:
1408                 inetdev_destroy(in_dev);
1409                 break;
1410         case NETDEV_CHANGENAME:
1411                 /* Do not notify about label change, this event is
1412                  * not interesting to applications using netlink.
1413                  */
1414                 inetdev_changename(dev, in_dev);
1415
1416                 devinet_sysctl_unregister(in_dev);
1417                 devinet_sysctl_register(in_dev);
1418                 break;
1419         }
1420 out:
1421         return NOTIFY_DONE;
1422 }
1423
1424 static struct notifier_block ip_netdev_notifier = {
1425         .notifier_call = inetdev_event,
1426 };
1427
1428 static size_t inet_nlmsg_size(void)
1429 {
1430         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1431                + nla_total_size(4) /* IFA_ADDRESS */
1432                + nla_total_size(4) /* IFA_LOCAL */
1433                + nla_total_size(4) /* IFA_BROADCAST */
1434                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1435 }
1436
1437 static inline u32 cstamp_delta(unsigned long cstamp)
1438 {
1439         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1440 }
1441
1442 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1443                          unsigned long tstamp, u32 preferred, u32 valid)
1444 {
1445         struct ifa_cacheinfo ci;
1446
1447         ci.cstamp = cstamp_delta(cstamp);
1448         ci.tstamp = cstamp_delta(tstamp);
1449         ci.ifa_prefered = preferred;
1450         ci.ifa_valid = valid;
1451
1452         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1453 }
1454
1455 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1456                             u32 portid, u32 seq, int event, unsigned int flags)
1457 {
1458         struct ifaddrmsg *ifm;
1459         struct nlmsghdr  *nlh;
1460         u32 preferred, valid;
1461
1462         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1463         if (nlh == NULL)
1464                 return -EMSGSIZE;
1465
1466         ifm = nlmsg_data(nlh);
1467         ifm->ifa_family = AF_INET;
1468         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1469         ifm->ifa_flags = ifa->ifa_flags;
1470         ifm->ifa_scope = ifa->ifa_scope;
1471         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1472
1473         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1474                 preferred = ifa->ifa_preferred_lft;
1475                 valid = ifa->ifa_valid_lft;
1476                 if (preferred != INFINITY_LIFE_TIME) {
1477                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1478
1479                         if (preferred > tval)
1480                                 preferred -= tval;
1481                         else
1482                                 preferred = 0;
1483                         if (valid != INFINITY_LIFE_TIME) {
1484                                 if (valid > tval)
1485                                         valid -= tval;
1486                                 else
1487                                         valid = 0;
1488                         }
1489                 }
1490         } else {
1491                 preferred = INFINITY_LIFE_TIME;
1492                 valid = INFINITY_LIFE_TIME;
1493         }
1494         if ((ifa->ifa_address &&
1495              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1496             (ifa->ifa_local &&
1497              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1498             (ifa->ifa_broadcast &&
1499              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1500             (ifa->ifa_label[0] &&
1501              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1502             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1503                           preferred, valid))
1504                 goto nla_put_failure;
1505
1506         return nlmsg_end(skb, nlh);
1507
1508 nla_put_failure:
1509         nlmsg_cancel(skb, nlh);
1510         return -EMSGSIZE;
1511 }
1512
1513 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1514 {
1515         struct net *net = sock_net(skb->sk);
1516         int h, s_h;
1517         int idx, s_idx;
1518         int ip_idx, s_ip_idx;
1519         struct net_device *dev;
1520         struct in_device *in_dev;
1521         struct in_ifaddr *ifa;
1522         struct hlist_head *head;
1523
1524         s_h = cb->args[0];
1525         s_idx = idx = cb->args[1];
1526         s_ip_idx = ip_idx = cb->args[2];
1527
1528         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1529                 idx = 0;
1530                 head = &net->dev_index_head[h];
1531                 rcu_read_lock();
1532                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1533                           net->dev_base_seq;
1534                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1535                         if (idx < s_idx)
1536                                 goto cont;
1537                         if (h > s_h || idx > s_idx)
1538                                 s_ip_idx = 0;
1539                         in_dev = __in_dev_get_rcu(dev);
1540                         if (!in_dev)
1541                                 goto cont;
1542
1543                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1544                              ifa = ifa->ifa_next, ip_idx++) {
1545                                 if (ip_idx < s_ip_idx)
1546                                         continue;
1547                                 if (inet_fill_ifaddr(skb, ifa,
1548                                              NETLINK_CB(cb->skb).portid,
1549                                              cb->nlh->nlmsg_seq,
1550                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1551                                         rcu_read_unlock();
1552                                         goto done;
1553                                 }
1554                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1555                         }
1556 cont:
1557                         idx++;
1558                 }
1559                 rcu_read_unlock();
1560         }
1561
1562 done:
1563         cb->args[0] = h;
1564         cb->args[1] = idx;
1565         cb->args[2] = ip_idx;
1566
1567         return skb->len;
1568 }
1569
1570 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1571                       u32 portid)
1572 {
1573         struct sk_buff *skb;
1574         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1575         int err = -ENOBUFS;
1576         struct net *net;
1577
1578         net = dev_net(ifa->ifa_dev->dev);
1579         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1580         if (skb == NULL)
1581                 goto errout;
1582
1583         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1584         if (err < 0) {
1585                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1586                 WARN_ON(err == -EMSGSIZE);
1587                 kfree_skb(skb);
1588                 goto errout;
1589         }
1590         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1591         return;
1592 errout:
1593         if (err < 0)
1594                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1595 }
1596
1597 static size_t inet_get_link_af_size(const struct net_device *dev)
1598 {
1599         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1600
1601         if (!in_dev)
1602                 return 0;
1603
1604         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1605 }
1606
1607 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1608 {
1609         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1610         struct nlattr *nla;
1611         int i;
1612
1613         if (!in_dev)
1614                 return -ENODATA;
1615
1616         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1617         if (nla == NULL)
1618                 return -EMSGSIZE;
1619
1620         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1621                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1622
1623         return 0;
1624 }
1625
1626 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1627         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1628 };
1629
1630 static int inet_validate_link_af(const struct net_device *dev,
1631                                  const struct nlattr *nla)
1632 {
1633         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1634         int err, rem;
1635
1636         if (dev && !__in_dev_get_rtnl(dev))
1637                 return -EAFNOSUPPORT;
1638
1639         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1640         if (err < 0)
1641                 return err;
1642
1643         if (tb[IFLA_INET_CONF]) {
1644                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1645                         int cfgid = nla_type(a);
1646
1647                         if (nla_len(a) < 4)
1648                                 return -EINVAL;
1649
1650                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1651                                 return -EINVAL;
1652                 }
1653         }
1654
1655         return 0;
1656 }
1657
1658 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1659 {
1660         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1661         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1662         int rem;
1663
1664         if (!in_dev)
1665                 return -EAFNOSUPPORT;
1666
1667         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1668                 BUG();
1669
1670         if (tb[IFLA_INET_CONF]) {
1671                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1672                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1673         }
1674
1675         return 0;
1676 }
1677
1678 static int inet_netconf_msgsize_devconf(int type)
1679 {
1680         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1681                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1682
1683         /* type -1 is used for ALL */
1684         if (type == -1 || type == NETCONFA_FORWARDING)
1685                 size += nla_total_size(4);
1686         if (type == -1 || type == NETCONFA_RP_FILTER)
1687                 size += nla_total_size(4);
1688         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1689                 size += nla_total_size(4);
1690
1691         return size;
1692 }
1693
1694 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1695                                      struct ipv4_devconf *devconf, u32 portid,
1696                                      u32 seq, int event, unsigned int flags,
1697                                      int type)
1698 {
1699         struct nlmsghdr  *nlh;
1700         struct netconfmsg *ncm;
1701
1702         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1703                         flags);
1704         if (nlh == NULL)
1705                 return -EMSGSIZE;
1706
1707         ncm = nlmsg_data(nlh);
1708         ncm->ncm_family = AF_INET;
1709
1710         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1711                 goto nla_put_failure;
1712
1713         /* type -1 is used for ALL */
1714         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1715             nla_put_s32(skb, NETCONFA_FORWARDING,
1716                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1717                 goto nla_put_failure;
1718         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1719             nla_put_s32(skb, NETCONFA_RP_FILTER,
1720                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1721                 goto nla_put_failure;
1722         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1723             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1724                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1725                 goto nla_put_failure;
1726
1727         return nlmsg_end(skb, nlh);
1728
1729 nla_put_failure:
1730         nlmsg_cancel(skb, nlh);
1731         return -EMSGSIZE;
1732 }
1733
1734 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1735                                  struct ipv4_devconf *devconf)
1736 {
1737         struct sk_buff *skb;
1738         int err = -ENOBUFS;
1739
1740         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1741         if (skb == NULL)
1742                 goto errout;
1743
1744         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1745                                         RTM_NEWNETCONF, 0, type);
1746         if (err < 0) {
1747                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1748                 WARN_ON(err == -EMSGSIZE);
1749                 kfree_skb(skb);
1750                 goto errout;
1751         }
1752         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1753         return;
1754 errout:
1755         if (err < 0)
1756                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1757 }
1758
1759 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1760         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1761         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1762         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1763 };
1764
1765 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1766                                     struct nlmsghdr *nlh)
1767 {
1768         struct net *net = sock_net(in_skb->sk);
1769         struct nlattr *tb[NETCONFA_MAX+1];
1770         struct netconfmsg *ncm;
1771         struct sk_buff *skb;
1772         struct ipv4_devconf *devconf;
1773         struct in_device *in_dev;
1774         struct net_device *dev;
1775         int ifindex;
1776         int err;
1777
1778         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1779                           devconf_ipv4_policy);
1780         if (err < 0)
1781                 goto errout;
1782
1783         err = EINVAL;
1784         if (!tb[NETCONFA_IFINDEX])
1785                 goto errout;
1786
1787         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1788         switch (ifindex) {
1789         case NETCONFA_IFINDEX_ALL:
1790                 devconf = net->ipv4.devconf_all;
1791                 break;
1792         case NETCONFA_IFINDEX_DEFAULT:
1793                 devconf = net->ipv4.devconf_dflt;
1794                 break;
1795         default:
1796                 dev = __dev_get_by_index(net, ifindex);
1797                 if (dev == NULL)
1798                         goto errout;
1799                 in_dev = __in_dev_get_rtnl(dev);
1800                 if (in_dev == NULL)
1801                         goto errout;
1802                 devconf = &in_dev->cnf;
1803                 break;
1804         }
1805
1806         err = -ENOBUFS;
1807         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1808         if (skb == NULL)
1809                 goto errout;
1810
1811         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1812                                         NETLINK_CB(in_skb).portid,
1813                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1814                                         -1);
1815         if (err < 0) {
1816                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1817                 WARN_ON(err == -EMSGSIZE);
1818                 kfree_skb(skb);
1819                 goto errout;
1820         }
1821         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1822 errout:
1823         return err;
1824 }
1825
1826 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1827                                      struct netlink_callback *cb)
1828 {
1829         struct net *net = sock_net(skb->sk);
1830         int h, s_h;
1831         int idx, s_idx;
1832         struct net_device *dev;
1833         struct in_device *in_dev;
1834         struct hlist_head *head;
1835
1836         s_h = cb->args[0];
1837         s_idx = idx = cb->args[1];
1838
1839         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1840                 idx = 0;
1841                 head = &net->dev_index_head[h];
1842                 rcu_read_lock();
1843                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1844                           net->dev_base_seq;
1845                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1846                         if (idx < s_idx)
1847                                 goto cont;
1848                         in_dev = __in_dev_get_rcu(dev);
1849                         if (!in_dev)
1850                                 goto cont;
1851
1852                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1853                                                       &in_dev->cnf,
1854                                                       NETLINK_CB(cb->skb).portid,
1855                                                       cb->nlh->nlmsg_seq,
1856                                                       RTM_NEWNETCONF,
1857                                                       NLM_F_MULTI,
1858                                                       -1) <= 0) {
1859                                 rcu_read_unlock();
1860                                 goto done;
1861                         }
1862                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1863 cont:
1864                         idx++;
1865                 }
1866                 rcu_read_unlock();
1867         }
1868         if (h == NETDEV_HASHENTRIES) {
1869                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1870                                               net->ipv4.devconf_all,
1871                                               NETLINK_CB(cb->skb).portid,
1872                                               cb->nlh->nlmsg_seq,
1873                                               RTM_NEWNETCONF, NLM_F_MULTI,
1874                                               -1) <= 0)
1875                         goto done;
1876                 else
1877                         h++;
1878         }
1879         if (h == NETDEV_HASHENTRIES + 1) {
1880                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1881                                               net->ipv4.devconf_dflt,
1882                                               NETLINK_CB(cb->skb).portid,
1883                                               cb->nlh->nlmsg_seq,
1884                                               RTM_NEWNETCONF, NLM_F_MULTI,
1885                                               -1) <= 0)
1886                         goto done;
1887                 else
1888                         h++;
1889         }
1890 done:
1891         cb->args[0] = h;
1892         cb->args[1] = idx;
1893
1894         return skb->len;
1895 }
1896
1897 #ifdef CONFIG_SYSCTL
1898
1899 static void devinet_copy_dflt_conf(struct net *net, int i)
1900 {
1901         struct net_device *dev;
1902
1903         rcu_read_lock();
1904         for_each_netdev_rcu(net, dev) {
1905                 struct in_device *in_dev;
1906
1907                 in_dev = __in_dev_get_rcu(dev);
1908                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1909                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1910         }
1911         rcu_read_unlock();
1912 }
1913
1914 /* called with RTNL locked */
1915 static void inet_forward_change(struct net *net)
1916 {
1917         struct net_device *dev;
1918         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1919
1920         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1921         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1922         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1923                                     NETCONFA_IFINDEX_ALL,
1924                                     net->ipv4.devconf_all);
1925         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1926                                     NETCONFA_IFINDEX_DEFAULT,
1927                                     net->ipv4.devconf_dflt);
1928
1929         for_each_netdev(net, dev) {
1930                 struct in_device *in_dev;
1931                 if (on)
1932                         dev_disable_lro(dev);
1933                 rcu_read_lock();
1934                 in_dev = __in_dev_get_rcu(dev);
1935                 if (in_dev) {
1936                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1937                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1938                                                     dev->ifindex, &in_dev->cnf);
1939                 }
1940                 rcu_read_unlock();
1941         }
1942 }
1943
1944 static int devinet_conf_proc(ctl_table *ctl, int write,
1945                              void __user *buffer,
1946                              size_t *lenp, loff_t *ppos)
1947 {
1948         int old_value = *(int *)ctl->data;
1949         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1950         int new_value = *(int *)ctl->data;
1951
1952         if (write) {
1953                 struct ipv4_devconf *cnf = ctl->extra1;
1954                 struct net *net = ctl->extra2;
1955                 int i = (int *)ctl->data - cnf->data;
1956
1957                 set_bit(i, cnf->state);
1958
1959                 if (cnf == net->ipv4.devconf_dflt)
1960                         devinet_copy_dflt_conf(net, i);
1961                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1962                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1963                         if ((new_value == 0) && (old_value != 0))
1964                                 rt_cache_flush(net);
1965                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1966                     new_value != old_value) {
1967                         int ifindex;
1968
1969                         if (cnf == net->ipv4.devconf_dflt)
1970                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1971                         else if (cnf == net->ipv4.devconf_all)
1972                                 ifindex = NETCONFA_IFINDEX_ALL;
1973                         else {
1974                                 struct in_device *idev =
1975                                         container_of(cnf, struct in_device,
1976                                                      cnf);
1977                                 ifindex = idev->dev->ifindex;
1978                         }
1979                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1980                                                     ifindex, cnf);
1981                 }
1982         }
1983
1984         return ret;
1985 }
1986
1987 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1988                                   void __user *buffer,
1989                                   size_t *lenp, loff_t *ppos)
1990 {
1991         int *valp = ctl->data;
1992         int val = *valp;
1993         loff_t pos = *ppos;
1994         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1995
1996         if (write && *valp != val) {
1997                 struct net *net = ctl->extra2;
1998
1999                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2000                         if (!rtnl_trylock()) {
2001                                 /* Restore the original values before restarting */
2002                                 *valp = val;
2003                                 *ppos = pos;
2004                                 return restart_syscall();
2005                         }
2006                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2007                                 inet_forward_change(net);
2008                         } else {
2009                                 struct ipv4_devconf *cnf = ctl->extra1;
2010                                 struct in_device *idev =
2011                                         container_of(cnf, struct in_device, cnf);
2012                                 if (*valp)
2013                                         dev_disable_lro(idev->dev);
2014                                 inet_netconf_notify_devconf(net,
2015                                                             NETCONFA_FORWARDING,
2016                                                             idev->dev->ifindex,
2017                                                             cnf);
2018                         }
2019                         rtnl_unlock();
2020                         rt_cache_flush(net);
2021                 } else
2022                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2023                                                     NETCONFA_IFINDEX_DEFAULT,
2024                                                     net->ipv4.devconf_dflt);
2025         }
2026
2027         return ret;
2028 }
2029
2030 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
2031                                 void __user *buffer,
2032                                 size_t *lenp, loff_t *ppos)
2033 {
2034         int *valp = ctl->data;
2035         int val = *valp;
2036         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2037         struct net *net = ctl->extra2;
2038
2039         if (write && *valp != val)
2040                 rt_cache_flush(net);
2041
2042         return ret;
2043 }
2044
2045 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2046         { \
2047                 .procname       = name, \
2048                 .data           = ipv4_devconf.data + \
2049                                   IPV4_DEVCONF_ ## attr - 1, \
2050                 .maxlen         = sizeof(int), \
2051                 .mode           = mval, \
2052                 .proc_handler   = proc, \
2053                 .extra1         = &ipv4_devconf, \
2054         }
2055
2056 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2057         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2058
2059 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2060         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2061
2062 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2063         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2064
2065 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2066         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2067
2068 static struct devinet_sysctl_table {
2069         struct ctl_table_header *sysctl_header;
2070         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2071 } devinet_sysctl = {
2072         .devinet_vars = {
2073                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2074                                              devinet_sysctl_forward),
2075                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2076
2077                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2078                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2079                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2080                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2081                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2082                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2083                                         "accept_source_route"),
2084                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2085                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2086                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2087                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2088                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2089                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2090                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2091                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2092                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2093                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2094                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2095                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2096                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2097
2098                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2099                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2100                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2101                                               "force_igmp_version"),
2102                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2103                                               "promote_secondaries"),
2104                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2105                                               "route_localnet"),
2106         },
2107 };
2108
2109 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2110                                         struct ipv4_devconf *p)
2111 {
2112         int i;
2113         struct devinet_sysctl_table *t;
2114         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2115
2116         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2117         if (!t)
2118                 goto out;
2119
2120         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2121                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2122                 t->devinet_vars[i].extra1 = p;
2123                 t->devinet_vars[i].extra2 = net;
2124         }
2125
2126         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2127
2128         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2129         if (!t->sysctl_header)
2130                 goto free;
2131
2132         p->sysctl = t;
2133         return 0;
2134
2135 free:
2136         kfree(t);
2137 out:
2138         return -ENOBUFS;
2139 }
2140
2141 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2142 {
2143         struct devinet_sysctl_table *t = cnf->sysctl;
2144
2145         if (t == NULL)
2146                 return;
2147
2148         cnf->sysctl = NULL;
2149         unregister_net_sysctl_table(t->sysctl_header);
2150         kfree(t);
2151 }
2152
2153 static void devinet_sysctl_register(struct in_device *idev)
2154 {
2155         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2156         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2157                                         &idev->cnf);
2158 }
2159
2160 static void devinet_sysctl_unregister(struct in_device *idev)
2161 {
2162         __devinet_sysctl_unregister(&idev->cnf);
2163         neigh_sysctl_unregister(idev->arp_parms);
2164 }
2165
2166 static struct ctl_table ctl_forward_entry[] = {
2167         {
2168                 .procname       = "ip_forward",
2169                 .data           = &ipv4_devconf.data[
2170                                         IPV4_DEVCONF_FORWARDING - 1],
2171                 .maxlen         = sizeof(int),
2172                 .mode           = 0644,
2173                 .proc_handler   = devinet_sysctl_forward,
2174                 .extra1         = &ipv4_devconf,
2175                 .extra2         = &init_net,
2176         },
2177         { },
2178 };
2179 #endif
2180
2181 static __net_init int devinet_init_net(struct net *net)
2182 {
2183         int err;
2184         struct ipv4_devconf *all, *dflt;
2185 #ifdef CONFIG_SYSCTL
2186         struct ctl_table *tbl = ctl_forward_entry;
2187         struct ctl_table_header *forw_hdr;
2188 #endif
2189
2190         err = -ENOMEM;
2191         all = &ipv4_devconf;
2192         dflt = &ipv4_devconf_dflt;
2193
2194         if (!net_eq(net, &init_net)) {
2195                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2196                 if (all == NULL)
2197                         goto err_alloc_all;
2198
2199                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2200                 if (dflt == NULL)
2201                         goto err_alloc_dflt;
2202
2203 #ifdef CONFIG_SYSCTL
2204                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2205                 if (tbl == NULL)
2206                         goto err_alloc_ctl;
2207
2208                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2209                 tbl[0].extra1 = all;
2210                 tbl[0].extra2 = net;
2211 #endif
2212         }
2213
2214 #ifdef CONFIG_SYSCTL
2215         err = __devinet_sysctl_register(net, "all", all);
2216         if (err < 0)
2217                 goto err_reg_all;
2218
2219         err = __devinet_sysctl_register(net, "default", dflt);
2220         if (err < 0)
2221                 goto err_reg_dflt;
2222
2223         err = -ENOMEM;
2224         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2225         if (forw_hdr == NULL)
2226                 goto err_reg_ctl;
2227         net->ipv4.forw_hdr = forw_hdr;
2228 #endif
2229
2230         net->ipv4.devconf_all = all;
2231         net->ipv4.devconf_dflt = dflt;
2232         return 0;
2233
2234 #ifdef CONFIG_SYSCTL
2235 err_reg_ctl:
2236         __devinet_sysctl_unregister(dflt);
2237 err_reg_dflt:
2238         __devinet_sysctl_unregister(all);
2239 err_reg_all:
2240         if (tbl != ctl_forward_entry)
2241                 kfree(tbl);
2242 err_alloc_ctl:
2243 #endif
2244         if (dflt != &ipv4_devconf_dflt)
2245                 kfree(dflt);
2246 err_alloc_dflt:
2247         if (all != &ipv4_devconf)
2248                 kfree(all);
2249 err_alloc_all:
2250         return err;
2251 }
2252
2253 static __net_exit void devinet_exit_net(struct net *net)
2254 {
2255 #ifdef CONFIG_SYSCTL
2256         struct ctl_table *tbl;
2257
2258         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2259         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2260         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2261         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2262         kfree(tbl);
2263 #endif
2264         kfree(net->ipv4.devconf_dflt);
2265         kfree(net->ipv4.devconf_all);
2266 }
2267
2268 static __net_initdata struct pernet_operations devinet_ops = {
2269         .init = devinet_init_net,
2270         .exit = devinet_exit_net,
2271 };
2272
2273 static struct rtnl_af_ops inet_af_ops = {
2274         .family           = AF_INET,
2275         .fill_link_af     = inet_fill_link_af,
2276         .get_link_af_size = inet_get_link_af_size,
2277         .validate_link_af = inet_validate_link_af,
2278         .set_link_af      = inet_set_link_af,
2279 };
2280
2281 void __init devinet_init(void)
2282 {
2283         int i;
2284
2285         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2286                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2287
2288         register_pernet_subsys(&devinet_ops);
2289
2290         register_gifconf(PF_INET, inet_gifconf);
2291         register_netdevice_notifier(&ip_netdev_notifier);
2292
2293         schedule_delayed_work(&check_lifetime_work, 0);
2294
2295         rtnl_af_register(&inet_af_ops);
2296
2297         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2298         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2299         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2300         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2301                       inet_netconf_dump_devconf, NULL);
2302 }
2303