Merge tag 'iwlwifi-next-for-kalle-2014-12-30' of https://git.kernel.org/pub/scm/linux...
[cascardo/linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78         },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82         .data = {
83                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90         },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97         [IFA_LOCAL]             = { .type = NLA_U32 },
98         [IFA_ADDRESS]           = { .type = NLA_U32 },
99         [IFA_BROADCAST]         = { .type = NLA_U32 },
100         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
102         [IFA_FLAGS]             = { .type = NLA_U32 },
103 };
104
105 #define IN4_ADDR_HSIZE_SHIFT    8
106 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
107
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109
110 static u32 inet_addr_hash(struct net *net, __be32 addr)
111 {
112         u32 val = (__force u32) addr ^ net_hash_mix(net);
113
114         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
115 }
116
117 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
118 {
119         u32 hash = inet_addr_hash(net, ifa->ifa_local);
120
121         ASSERT_RTNL();
122         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
123 }
124
125 static void inet_hash_remove(struct in_ifaddr *ifa)
126 {
127         ASSERT_RTNL();
128         hlist_del_init_rcu(&ifa->hash);
129 }
130
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141         u32 hash = inet_addr_hash(net, addr);
142         struct net_device *result = NULL;
143         struct in_ifaddr *ifa;
144
145         rcu_read_lock();
146         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
147                 if (ifa->ifa_local == addr) {
148                         struct net_device *dev = ifa->ifa_dev->dev;
149
150                         if (!net_eq(dev_net(dev), net))
151                                 continue;
152                         result = dev;
153                         break;
154                 }
155         }
156         if (!result) {
157                 struct flowi4 fl4 = { .daddr = addr };
158                 struct fib_result res = { 0 };
159                 struct fib_table *local;
160
161                 /* Fallback to FIB local table so that communication
162                  * over loopback subnets work.
163                  */
164                 local = fib_get_table(net, RT_TABLE_LOCAL);
165                 if (local &&
166                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167                     res.type == RTN_LOCAL)
168                         result = FIB_RES_DEV(res);
169         }
170         if (result && devref)
171                 dev_hold(result);
172         rcu_read_unlock();
173         return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181                          int destroy);
182 #ifdef CONFIG_SYSCTL
183 static int devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static int devinet_sysctl_register(struct in_device *idev)
187 {
188         return 0;
189 }
190 static void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194
195 /* Locks all the inet devices. */
196
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205         if (ifa->ifa_dev)
206                 in_dev_put(ifa->ifa_dev);
207         kfree(ifa);
208 }
209
210 static void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217         struct net_device *dev = idev->dev;
218
219         WARN_ON(idev->ifa_list);
220         WARN_ON(idev->mc_list);
221         kfree(rcu_dereference_protected(idev->mc_hash, 1));
222 #ifdef NET_REFCNT_DEBUG
223         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
224 #endif
225         dev_put(dev);
226         if (!idev->dead)
227                 pr_err("Freeing alive in_device %p\n", idev);
228         else
229                 kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235         struct in_device *in_dev;
236         int err = -ENOMEM;
237
238         ASSERT_RTNL();
239
240         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
241         if (!in_dev)
242                 goto out;
243         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
244                         sizeof(in_dev->cnf));
245         in_dev->cnf.sysctl = NULL;
246         in_dev->dev = dev;
247         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
248         if (!in_dev->arp_parms)
249                 goto out_kfree;
250         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
251                 dev_disable_lro(dev);
252         /* Reference in_dev->dev */
253         dev_hold(dev);
254         /* Account for reference dev->ip_ptr (below) */
255         in_dev_hold(in_dev);
256
257         err = devinet_sysctl_register(in_dev);
258         if (err) {
259                 in_dev->dead = 1;
260                 in_dev_put(in_dev);
261                 in_dev = NULL;
262                 goto out;
263         }
264         ip_mc_init_dev(in_dev);
265         if (dev->flags & IFF_UP)
266                 ip_mc_up(in_dev);
267
268         /* we can receive as soon as ip_ptr is set -- do this last */
269         rcu_assign_pointer(dev->ip_ptr, in_dev);
270 out:
271         return in_dev ?: ERR_PTR(err);
272 out_kfree:
273         kfree(in_dev);
274         in_dev = NULL;
275         goto out;
276 }
277
278 static void in_dev_rcu_put(struct rcu_head *head)
279 {
280         struct in_device *idev = container_of(head, struct in_device, rcu_head);
281         in_dev_put(idev);
282 }
283
284 static void inetdev_destroy(struct in_device *in_dev)
285 {
286         struct in_ifaddr *ifa;
287         struct net_device *dev;
288
289         ASSERT_RTNL();
290
291         dev = in_dev->dev;
292
293         in_dev->dead = 1;
294
295         ip_mc_destroy_dev(in_dev);
296
297         while ((ifa = in_dev->ifa_list) != NULL) {
298                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
299                 inet_free_ifa(ifa);
300         }
301
302         RCU_INIT_POINTER(dev->ip_ptr, NULL);
303
304         devinet_sysctl_unregister(in_dev);
305         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
306         arp_ifdown(dev);
307
308         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
309 }
310
311 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
312 {
313         rcu_read_lock();
314         for_primary_ifa(in_dev) {
315                 if (inet_ifa_match(a, ifa)) {
316                         if (!b || inet_ifa_match(b, ifa)) {
317                                 rcu_read_unlock();
318                                 return 1;
319                         }
320                 }
321         } endfor_ifa(in_dev);
322         rcu_read_unlock();
323         return 0;
324 }
325
326 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327                          int destroy, struct nlmsghdr *nlh, u32 portid)
328 {
329         struct in_ifaddr *promote = NULL;
330         struct in_ifaddr *ifa, *ifa1 = *ifap;
331         struct in_ifaddr *last_prim = in_dev->ifa_list;
332         struct in_ifaddr *prev_prom = NULL;
333         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
334
335         ASSERT_RTNL();
336
337         /* 1. Deleting primary ifaddr forces deletion all secondaries
338          * unless alias promotion is set
339          **/
340
341         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
342                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
343
344                 while ((ifa = *ifap1) != NULL) {
345                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
346                             ifa1->ifa_scope <= ifa->ifa_scope)
347                                 last_prim = ifa;
348
349                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
350                             ifa1->ifa_mask != ifa->ifa_mask ||
351                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
352                                 ifap1 = &ifa->ifa_next;
353                                 prev_prom = ifa;
354                                 continue;
355                         }
356
357                         if (!do_promote) {
358                                 inet_hash_remove(ifa);
359                                 *ifap1 = ifa->ifa_next;
360
361                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
362                                 blocking_notifier_call_chain(&inetaddr_chain,
363                                                 NETDEV_DOWN, ifa);
364                                 inet_free_ifa(ifa);
365                         } else {
366                                 promote = ifa;
367                                 break;
368                         }
369                 }
370         }
371
372         /* On promotion all secondaries from subnet are changing
373          * the primary IP, we must remove all their routes silently
374          * and later to add them back with new prefsrc. Do this
375          * while all addresses are on the device list.
376          */
377         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
378                 if (ifa1->ifa_mask == ifa->ifa_mask &&
379                     inet_ifa_match(ifa1->ifa_address, ifa))
380                         fib_del_ifaddr(ifa, ifa1);
381         }
382
383         /* 2. Unlink it */
384
385         *ifap = ifa1->ifa_next;
386         inet_hash_remove(ifa1);
387
388         /* 3. Announce address deletion */
389
390         /* Send message first, then call notifier.
391            At first sight, FIB update triggered by notifier
392            will refer to already deleted ifaddr, that could confuse
393            netlink listeners. It is not true: look, gated sees
394            that route deleted and if it still thinks that ifaddr
395            is valid, it will try to restore deleted routes... Grr.
396            So that, this order is correct.
397          */
398         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
399         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
400
401         if (promote) {
402                 struct in_ifaddr *next_sec = promote->ifa_next;
403
404                 if (prev_prom) {
405                         prev_prom->ifa_next = promote->ifa_next;
406                         promote->ifa_next = last_prim->ifa_next;
407                         last_prim->ifa_next = promote;
408                 }
409
410                 promote->ifa_flags &= ~IFA_F_SECONDARY;
411                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
412                 blocking_notifier_call_chain(&inetaddr_chain,
413                                 NETDEV_UP, promote);
414                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
415                         if (ifa1->ifa_mask != ifa->ifa_mask ||
416                             !inet_ifa_match(ifa1->ifa_address, ifa))
417                                         continue;
418                         fib_add_ifaddr(ifa);
419                 }
420
421         }
422         if (destroy)
423                 inet_free_ifa(ifa1);
424 }
425
426 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
427                          int destroy)
428 {
429         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
430 }
431
432 static void check_lifetime(struct work_struct *work);
433
434 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
435
436 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
437                              u32 portid)
438 {
439         struct in_device *in_dev = ifa->ifa_dev;
440         struct in_ifaddr *ifa1, **ifap, **last_primary;
441
442         ASSERT_RTNL();
443
444         if (!ifa->ifa_local) {
445                 inet_free_ifa(ifa);
446                 return 0;
447         }
448
449         ifa->ifa_flags &= ~IFA_F_SECONDARY;
450         last_primary = &in_dev->ifa_list;
451
452         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
453              ifap = &ifa1->ifa_next) {
454                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
455                     ifa->ifa_scope <= ifa1->ifa_scope)
456                         last_primary = &ifa1->ifa_next;
457                 if (ifa1->ifa_mask == ifa->ifa_mask &&
458                     inet_ifa_match(ifa1->ifa_address, ifa)) {
459                         if (ifa1->ifa_local == ifa->ifa_local) {
460                                 inet_free_ifa(ifa);
461                                 return -EEXIST;
462                         }
463                         if (ifa1->ifa_scope != ifa->ifa_scope) {
464                                 inet_free_ifa(ifa);
465                                 return -EINVAL;
466                         }
467                         ifa->ifa_flags |= IFA_F_SECONDARY;
468                 }
469         }
470
471         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
472                 prandom_seed((__force u32) ifa->ifa_local);
473                 ifap = last_primary;
474         }
475
476         ifa->ifa_next = *ifap;
477         *ifap = ifa;
478
479         inet_hash_insert(dev_net(in_dev->dev), ifa);
480
481         cancel_delayed_work(&check_lifetime_work);
482         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
483
484         /* Send message first, then call notifier.
485            Notifier will trigger FIB update, so that
486            listeners of netlink will know about new ifaddr */
487         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
488         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
489
490         return 0;
491 }
492
493 static int inet_insert_ifa(struct in_ifaddr *ifa)
494 {
495         return __inet_insert_ifa(ifa, NULL, 0);
496 }
497
498 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
499 {
500         struct in_device *in_dev = __in_dev_get_rtnl(dev);
501
502         ASSERT_RTNL();
503
504         if (!in_dev) {
505                 inet_free_ifa(ifa);
506                 return -ENOBUFS;
507         }
508         ipv4_devconf_setall(in_dev);
509         neigh_parms_data_state_setall(in_dev->arp_parms);
510         if (ifa->ifa_dev != in_dev) {
511                 WARN_ON(ifa->ifa_dev);
512                 in_dev_hold(in_dev);
513                 ifa->ifa_dev = in_dev;
514         }
515         if (ipv4_is_loopback(ifa->ifa_local))
516                 ifa->ifa_scope = RT_SCOPE_HOST;
517         return inet_insert_ifa(ifa);
518 }
519
520 /* Caller must hold RCU or RTNL :
521  * We dont take a reference on found in_device
522  */
523 struct in_device *inetdev_by_index(struct net *net, int ifindex)
524 {
525         struct net_device *dev;
526         struct in_device *in_dev = NULL;
527
528         rcu_read_lock();
529         dev = dev_get_by_index_rcu(net, ifindex);
530         if (dev)
531                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
532         rcu_read_unlock();
533         return in_dev;
534 }
535 EXPORT_SYMBOL(inetdev_by_index);
536
537 /* Called only from RTNL semaphored context. No locks. */
538
539 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
540                                     __be32 mask)
541 {
542         ASSERT_RTNL();
543
544         for_primary_ifa(in_dev) {
545                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
546                         return ifa;
547         } endfor_ifa(in_dev);
548         return NULL;
549 }
550
551 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
552 {
553         struct net *net = sock_net(skb->sk);
554         struct nlattr *tb[IFA_MAX+1];
555         struct in_device *in_dev;
556         struct ifaddrmsg *ifm;
557         struct in_ifaddr *ifa, **ifap;
558         int err = -EINVAL;
559
560         ASSERT_RTNL();
561
562         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
563         if (err < 0)
564                 goto errout;
565
566         ifm = nlmsg_data(nlh);
567         in_dev = inetdev_by_index(net, ifm->ifa_index);
568         if (in_dev == NULL) {
569                 err = -ENODEV;
570                 goto errout;
571         }
572
573         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
574              ifap = &ifa->ifa_next) {
575                 if (tb[IFA_LOCAL] &&
576                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
577                         continue;
578
579                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
580                         continue;
581
582                 if (tb[IFA_ADDRESS] &&
583                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
584                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
585                         continue;
586
587                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
588                 return 0;
589         }
590
591         err = -EADDRNOTAVAIL;
592 errout:
593         return err;
594 }
595
596 #define INFINITY_LIFE_TIME      0xFFFFFFFF
597
598 static void check_lifetime(struct work_struct *work)
599 {
600         unsigned long now, next, next_sec, next_sched;
601         struct in_ifaddr *ifa;
602         struct hlist_node *n;
603         int i;
604
605         now = jiffies;
606         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
607
608         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
609                 bool change_needed = false;
610
611                 rcu_read_lock();
612                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
613                         unsigned long age;
614
615                         if (ifa->ifa_flags & IFA_F_PERMANENT)
616                                 continue;
617
618                         /* We try to batch several events at once. */
619                         age = (now - ifa->ifa_tstamp +
620                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
621
622                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
623                             age >= ifa->ifa_valid_lft) {
624                                 change_needed = true;
625                         } else if (ifa->ifa_preferred_lft ==
626                                    INFINITY_LIFE_TIME) {
627                                 continue;
628                         } else if (age >= ifa->ifa_preferred_lft) {
629                                 if (time_before(ifa->ifa_tstamp +
630                                                 ifa->ifa_valid_lft * HZ, next))
631                                         next = ifa->ifa_tstamp +
632                                                ifa->ifa_valid_lft * HZ;
633
634                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
635                                         change_needed = true;
636                         } else if (time_before(ifa->ifa_tstamp +
637                                                ifa->ifa_preferred_lft * HZ,
638                                                next)) {
639                                 next = ifa->ifa_tstamp +
640                                        ifa->ifa_preferred_lft * HZ;
641                         }
642                 }
643                 rcu_read_unlock();
644                 if (!change_needed)
645                         continue;
646                 rtnl_lock();
647                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
648                         unsigned long age;
649
650                         if (ifa->ifa_flags & IFA_F_PERMANENT)
651                                 continue;
652
653                         /* We try to batch several events at once. */
654                         age = (now - ifa->ifa_tstamp +
655                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
656
657                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
658                             age >= ifa->ifa_valid_lft) {
659                                 struct in_ifaddr **ifap;
660
661                                 for (ifap = &ifa->ifa_dev->ifa_list;
662                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
663                                         if (*ifap == ifa) {
664                                                 inet_del_ifa(ifa->ifa_dev,
665                                                              ifap, 1);
666                                                 break;
667                                         }
668                                 }
669                         } else if (ifa->ifa_preferred_lft !=
670                                    INFINITY_LIFE_TIME &&
671                                    age >= ifa->ifa_preferred_lft &&
672                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
673                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
674                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
675                         }
676                 }
677                 rtnl_unlock();
678         }
679
680         next_sec = round_jiffies_up(next);
681         next_sched = next;
682
683         /* If rounded timeout is accurate enough, accept it. */
684         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
685                 next_sched = next_sec;
686
687         now = jiffies;
688         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
689         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
690                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
691
692         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
693                         next_sched - now);
694 }
695
696 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
697                              __u32 prefered_lft)
698 {
699         unsigned long timeout;
700
701         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
702
703         timeout = addrconf_timeout_fixup(valid_lft, HZ);
704         if (addrconf_finite_timeout(timeout))
705                 ifa->ifa_valid_lft = timeout;
706         else
707                 ifa->ifa_flags |= IFA_F_PERMANENT;
708
709         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
710         if (addrconf_finite_timeout(timeout)) {
711                 if (timeout == 0)
712                         ifa->ifa_flags |= IFA_F_DEPRECATED;
713                 ifa->ifa_preferred_lft = timeout;
714         }
715         ifa->ifa_tstamp = jiffies;
716         if (!ifa->ifa_cstamp)
717                 ifa->ifa_cstamp = ifa->ifa_tstamp;
718 }
719
720 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
721                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
722 {
723         struct nlattr *tb[IFA_MAX+1];
724         struct in_ifaddr *ifa;
725         struct ifaddrmsg *ifm;
726         struct net_device *dev;
727         struct in_device *in_dev;
728         int err;
729
730         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
731         if (err < 0)
732                 goto errout;
733
734         ifm = nlmsg_data(nlh);
735         err = -EINVAL;
736         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
737                 goto errout;
738
739         dev = __dev_get_by_index(net, ifm->ifa_index);
740         err = -ENODEV;
741         if (dev == NULL)
742                 goto errout;
743
744         in_dev = __in_dev_get_rtnl(dev);
745         err = -ENOBUFS;
746         if (in_dev == NULL)
747                 goto errout;
748
749         ifa = inet_alloc_ifa();
750         if (ifa == NULL)
751                 /*
752                  * A potential indev allocation can be left alive, it stays
753                  * assigned to its device and is destroy with it.
754                  */
755                 goto errout;
756
757         ipv4_devconf_setall(in_dev);
758         neigh_parms_data_state_setall(in_dev->arp_parms);
759         in_dev_hold(in_dev);
760
761         if (tb[IFA_ADDRESS] == NULL)
762                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
763
764         INIT_HLIST_NODE(&ifa->hash);
765         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
766         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
767         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
768                                          ifm->ifa_flags;
769         ifa->ifa_scope = ifm->ifa_scope;
770         ifa->ifa_dev = in_dev;
771
772         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
773         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
774
775         if (tb[IFA_BROADCAST])
776                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
777
778         if (tb[IFA_LABEL])
779                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
780         else
781                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
782
783         if (tb[IFA_CACHEINFO]) {
784                 struct ifa_cacheinfo *ci;
785
786                 ci = nla_data(tb[IFA_CACHEINFO]);
787                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
788                         err = -EINVAL;
789                         goto errout_free;
790                 }
791                 *pvalid_lft = ci->ifa_valid;
792                 *pprefered_lft = ci->ifa_prefered;
793         }
794
795         return ifa;
796
797 errout_free:
798         inet_free_ifa(ifa);
799 errout:
800         return ERR_PTR(err);
801 }
802
803 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
804 {
805         struct in_device *in_dev = ifa->ifa_dev;
806         struct in_ifaddr *ifa1, **ifap;
807
808         if (!ifa->ifa_local)
809                 return NULL;
810
811         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
812              ifap = &ifa1->ifa_next) {
813                 if (ifa1->ifa_mask == ifa->ifa_mask &&
814                     inet_ifa_match(ifa1->ifa_address, ifa) &&
815                     ifa1->ifa_local == ifa->ifa_local)
816                         return ifa1;
817         }
818         return NULL;
819 }
820
821 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
822 {
823         struct net *net = sock_net(skb->sk);
824         struct in_ifaddr *ifa;
825         struct in_ifaddr *ifa_existing;
826         __u32 valid_lft = INFINITY_LIFE_TIME;
827         __u32 prefered_lft = INFINITY_LIFE_TIME;
828
829         ASSERT_RTNL();
830
831         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
832         if (IS_ERR(ifa))
833                 return PTR_ERR(ifa);
834
835         ifa_existing = find_matching_ifa(ifa);
836         if (!ifa_existing) {
837                 /* It would be best to check for !NLM_F_CREATE here but
838                  * userspace already relies on not having to provide this.
839                  */
840                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
841                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
842         } else {
843                 inet_free_ifa(ifa);
844
845                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
846                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
847                         return -EEXIST;
848                 ifa = ifa_existing;
849                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
850                 cancel_delayed_work(&check_lifetime_work);
851                 queue_delayed_work(system_power_efficient_wq,
852                                 &check_lifetime_work, 0);
853                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
854                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
855         }
856         return 0;
857 }
858
859 /*
860  *      Determine a default network mask, based on the IP address.
861  */
862
863 static int inet_abc_len(__be32 addr)
864 {
865         int rc = -1;    /* Something else, probably a multicast. */
866
867         if (ipv4_is_zeronet(addr))
868                 rc = 0;
869         else {
870                 __u32 haddr = ntohl(addr);
871
872                 if (IN_CLASSA(haddr))
873                         rc = 8;
874                 else if (IN_CLASSB(haddr))
875                         rc = 16;
876                 else if (IN_CLASSC(haddr))
877                         rc = 24;
878         }
879
880         return rc;
881 }
882
883
884 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
885 {
886         struct ifreq ifr;
887         struct sockaddr_in sin_orig;
888         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
889         struct in_device *in_dev;
890         struct in_ifaddr **ifap = NULL;
891         struct in_ifaddr *ifa = NULL;
892         struct net_device *dev;
893         char *colon;
894         int ret = -EFAULT;
895         int tryaddrmatch = 0;
896
897         /*
898          *      Fetch the caller's info block into kernel space
899          */
900
901         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
902                 goto out;
903         ifr.ifr_name[IFNAMSIZ - 1] = 0;
904
905         /* save original address for comparison */
906         memcpy(&sin_orig, sin, sizeof(*sin));
907
908         colon = strchr(ifr.ifr_name, ':');
909         if (colon)
910                 *colon = 0;
911
912         dev_load(net, ifr.ifr_name);
913
914         switch (cmd) {
915         case SIOCGIFADDR:       /* Get interface address */
916         case SIOCGIFBRDADDR:    /* Get the broadcast address */
917         case SIOCGIFDSTADDR:    /* Get the destination address */
918         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
919                 /* Note that these ioctls will not sleep,
920                    so that we do not impose a lock.
921                    One day we will be forced to put shlock here (I mean SMP)
922                  */
923                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
924                 memset(sin, 0, sizeof(*sin));
925                 sin->sin_family = AF_INET;
926                 break;
927
928         case SIOCSIFFLAGS:
929                 ret = -EPERM;
930                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
931                         goto out;
932                 break;
933         case SIOCSIFADDR:       /* Set interface address (and family) */
934         case SIOCSIFBRDADDR:    /* Set the broadcast address */
935         case SIOCSIFDSTADDR:    /* Set the destination address */
936         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
937                 ret = -EPERM;
938                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
939                         goto out;
940                 ret = -EINVAL;
941                 if (sin->sin_family != AF_INET)
942                         goto out;
943                 break;
944         default:
945                 ret = -EINVAL;
946                 goto out;
947         }
948
949         rtnl_lock();
950
951         ret = -ENODEV;
952         dev = __dev_get_by_name(net, ifr.ifr_name);
953         if (!dev)
954                 goto done;
955
956         if (colon)
957                 *colon = ':';
958
959         in_dev = __in_dev_get_rtnl(dev);
960         if (in_dev) {
961                 if (tryaddrmatch) {
962                         /* Matthias Andree */
963                         /* compare label and address (4.4BSD style) */
964                         /* note: we only do this for a limited set of ioctls
965                            and only if the original address family was AF_INET.
966                            This is checked above. */
967                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
968                              ifap = &ifa->ifa_next) {
969                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
970                                     sin_orig.sin_addr.s_addr ==
971                                                         ifa->ifa_local) {
972                                         break; /* found */
973                                 }
974                         }
975                 }
976                 /* we didn't get a match, maybe the application is
977                    4.3BSD-style and passed in junk so we fall back to
978                    comparing just the label */
979                 if (!ifa) {
980                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
981                              ifap = &ifa->ifa_next)
982                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
983                                         break;
984                 }
985         }
986
987         ret = -EADDRNOTAVAIL;
988         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
989                 goto done;
990
991         switch (cmd) {
992         case SIOCGIFADDR:       /* Get interface address */
993                 sin->sin_addr.s_addr = ifa->ifa_local;
994                 goto rarok;
995
996         case SIOCGIFBRDADDR:    /* Get the broadcast address */
997                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
998                 goto rarok;
999
1000         case SIOCGIFDSTADDR:    /* Get the destination address */
1001                 sin->sin_addr.s_addr = ifa->ifa_address;
1002                 goto rarok;
1003
1004         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1005                 sin->sin_addr.s_addr = ifa->ifa_mask;
1006                 goto rarok;
1007
1008         case SIOCSIFFLAGS:
1009                 if (colon) {
1010                         ret = -EADDRNOTAVAIL;
1011                         if (!ifa)
1012                                 break;
1013                         ret = 0;
1014                         if (!(ifr.ifr_flags & IFF_UP))
1015                                 inet_del_ifa(in_dev, ifap, 1);
1016                         break;
1017                 }
1018                 ret = dev_change_flags(dev, ifr.ifr_flags);
1019                 break;
1020
1021         case SIOCSIFADDR:       /* Set interface address (and family) */
1022                 ret = -EINVAL;
1023                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1024                         break;
1025
1026                 if (!ifa) {
1027                         ret = -ENOBUFS;
1028                         ifa = inet_alloc_ifa();
1029                         if (!ifa)
1030                                 break;
1031                         INIT_HLIST_NODE(&ifa->hash);
1032                         if (colon)
1033                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1034                         else
1035                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1036                 } else {
1037                         ret = 0;
1038                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1039                                 break;
1040                         inet_del_ifa(in_dev, ifap, 0);
1041                         ifa->ifa_broadcast = 0;
1042                         ifa->ifa_scope = 0;
1043                 }
1044
1045                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1046
1047                 if (!(dev->flags & IFF_POINTOPOINT)) {
1048                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1049                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1050                         if ((dev->flags & IFF_BROADCAST) &&
1051                             ifa->ifa_prefixlen < 31)
1052                                 ifa->ifa_broadcast = ifa->ifa_address |
1053                                                      ~ifa->ifa_mask;
1054                 } else {
1055                         ifa->ifa_prefixlen = 32;
1056                         ifa->ifa_mask = inet_make_mask(32);
1057                 }
1058                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1059                 ret = inet_set_ifa(dev, ifa);
1060                 break;
1061
1062         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1063                 ret = 0;
1064                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1065                         inet_del_ifa(in_dev, ifap, 0);
1066                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1067                         inet_insert_ifa(ifa);
1068                 }
1069                 break;
1070
1071         case SIOCSIFDSTADDR:    /* Set the destination address */
1072                 ret = 0;
1073                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1074                         break;
1075                 ret = -EINVAL;
1076                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1077                         break;
1078                 ret = 0;
1079                 inet_del_ifa(in_dev, ifap, 0);
1080                 ifa->ifa_address = sin->sin_addr.s_addr;
1081                 inet_insert_ifa(ifa);
1082                 break;
1083
1084         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1085
1086                 /*
1087                  *      The mask we set must be legal.
1088                  */
1089                 ret = -EINVAL;
1090                 if (bad_mask(sin->sin_addr.s_addr, 0))
1091                         break;
1092                 ret = 0;
1093                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1094                         __be32 old_mask = ifa->ifa_mask;
1095                         inet_del_ifa(in_dev, ifap, 0);
1096                         ifa->ifa_mask = sin->sin_addr.s_addr;
1097                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1098
1099                         /* See if current broadcast address matches
1100                          * with current netmask, then recalculate
1101                          * the broadcast address. Otherwise it's a
1102                          * funny address, so don't touch it since
1103                          * the user seems to know what (s)he's doing...
1104                          */
1105                         if ((dev->flags & IFF_BROADCAST) &&
1106                             (ifa->ifa_prefixlen < 31) &&
1107                             (ifa->ifa_broadcast ==
1108                              (ifa->ifa_local|~old_mask))) {
1109                                 ifa->ifa_broadcast = (ifa->ifa_local |
1110                                                       ~sin->sin_addr.s_addr);
1111                         }
1112                         inet_insert_ifa(ifa);
1113                 }
1114                 break;
1115         }
1116 done:
1117         rtnl_unlock();
1118 out:
1119         return ret;
1120 rarok:
1121         rtnl_unlock();
1122         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1123         goto out;
1124 }
1125
1126 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1127 {
1128         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1129         struct in_ifaddr *ifa;
1130         struct ifreq ifr;
1131         int done = 0;
1132
1133         if (!in_dev)
1134                 goto out;
1135
1136         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1137                 if (!buf) {
1138                         done += sizeof(ifr);
1139                         continue;
1140                 }
1141                 if (len < (int) sizeof(ifr))
1142                         break;
1143                 memset(&ifr, 0, sizeof(struct ifreq));
1144                 strcpy(ifr.ifr_name, ifa->ifa_label);
1145
1146                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1147                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1148                                                                 ifa->ifa_local;
1149
1150                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1151                         done = -EFAULT;
1152                         break;
1153                 }
1154                 buf  += sizeof(struct ifreq);
1155                 len  -= sizeof(struct ifreq);
1156                 done += sizeof(struct ifreq);
1157         }
1158 out:
1159         return done;
1160 }
1161
1162 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1163 {
1164         __be32 addr = 0;
1165         struct in_device *in_dev;
1166         struct net *net = dev_net(dev);
1167
1168         rcu_read_lock();
1169         in_dev = __in_dev_get_rcu(dev);
1170         if (!in_dev)
1171                 goto no_in_dev;
1172
1173         for_primary_ifa(in_dev) {
1174                 if (ifa->ifa_scope > scope)
1175                         continue;
1176                 if (!dst || inet_ifa_match(dst, ifa)) {
1177                         addr = ifa->ifa_local;
1178                         break;
1179                 }
1180                 if (!addr)
1181                         addr = ifa->ifa_local;
1182         } endfor_ifa(in_dev);
1183
1184         if (addr)
1185                 goto out_unlock;
1186 no_in_dev:
1187
1188         /* Not loopback addresses on loopback should be preferred
1189            in this case. It is importnat that lo is the first interface
1190            in dev_base list.
1191          */
1192         for_each_netdev_rcu(net, dev) {
1193                 in_dev = __in_dev_get_rcu(dev);
1194                 if (!in_dev)
1195                         continue;
1196
1197                 for_primary_ifa(in_dev) {
1198                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1199                             ifa->ifa_scope <= scope) {
1200                                 addr = ifa->ifa_local;
1201                                 goto out_unlock;
1202                         }
1203                 } endfor_ifa(in_dev);
1204         }
1205 out_unlock:
1206         rcu_read_unlock();
1207         return addr;
1208 }
1209 EXPORT_SYMBOL(inet_select_addr);
1210
1211 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1212                               __be32 local, int scope)
1213 {
1214         int same = 0;
1215         __be32 addr = 0;
1216
1217         for_ifa(in_dev) {
1218                 if (!addr &&
1219                     (local == ifa->ifa_local || !local) &&
1220                     ifa->ifa_scope <= scope) {
1221                         addr = ifa->ifa_local;
1222                         if (same)
1223                                 break;
1224                 }
1225                 if (!same) {
1226                         same = (!local || inet_ifa_match(local, ifa)) &&
1227                                 (!dst || inet_ifa_match(dst, ifa));
1228                         if (same && addr) {
1229                                 if (local || !dst)
1230                                         break;
1231                                 /* Is the selected addr into dst subnet? */
1232                                 if (inet_ifa_match(addr, ifa))
1233                                         break;
1234                                 /* No, then can we use new local src? */
1235                                 if (ifa->ifa_scope <= scope) {
1236                                         addr = ifa->ifa_local;
1237                                         break;
1238                                 }
1239                                 /* search for large dst subnet for addr */
1240                                 same = 0;
1241                         }
1242                 }
1243         } endfor_ifa(in_dev);
1244
1245         return same ? addr : 0;
1246 }
1247
1248 /*
1249  * Confirm that local IP address exists using wildcards:
1250  * - net: netns to check, cannot be NULL
1251  * - in_dev: only on this interface, NULL=any interface
1252  * - dst: only in the same subnet as dst, 0=any dst
1253  * - local: address, 0=autoselect the local address
1254  * - scope: maximum allowed scope value for the local address
1255  */
1256 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1257                          __be32 dst, __be32 local, int scope)
1258 {
1259         __be32 addr = 0;
1260         struct net_device *dev;
1261
1262         if (in_dev != NULL)
1263                 return confirm_addr_indev(in_dev, dst, local, scope);
1264
1265         rcu_read_lock();
1266         for_each_netdev_rcu(net, dev) {
1267                 in_dev = __in_dev_get_rcu(dev);
1268                 if (in_dev) {
1269                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1270                         if (addr)
1271                                 break;
1272                 }
1273         }
1274         rcu_read_unlock();
1275
1276         return addr;
1277 }
1278 EXPORT_SYMBOL(inet_confirm_addr);
1279
1280 /*
1281  *      Device notifier
1282  */
1283
1284 int register_inetaddr_notifier(struct notifier_block *nb)
1285 {
1286         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1287 }
1288 EXPORT_SYMBOL(register_inetaddr_notifier);
1289
1290 int unregister_inetaddr_notifier(struct notifier_block *nb)
1291 {
1292         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1293 }
1294 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1295
1296 /* Rename ifa_labels for a device name change. Make some effort to preserve
1297  * existing alias numbering and to create unique labels if possible.
1298 */
1299 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1300 {
1301         struct in_ifaddr *ifa;
1302         int named = 0;
1303
1304         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1305                 char old[IFNAMSIZ], *dot;
1306
1307                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1308                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1309                 if (named++ == 0)
1310                         goto skip;
1311                 dot = strchr(old, ':');
1312                 if (dot == NULL) {
1313                         sprintf(old, ":%d", named);
1314                         dot = old;
1315                 }
1316                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1317                         strcat(ifa->ifa_label, dot);
1318                 else
1319                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1320 skip:
1321                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1322         }
1323 }
1324
1325 static bool inetdev_valid_mtu(unsigned int mtu)
1326 {
1327         return mtu >= 68;
1328 }
1329
1330 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1331                                         struct in_device *in_dev)
1332
1333 {
1334         struct in_ifaddr *ifa;
1335
1336         for (ifa = in_dev->ifa_list; ifa;
1337              ifa = ifa->ifa_next) {
1338                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1339                          ifa->ifa_local, dev,
1340                          ifa->ifa_local, NULL,
1341                          dev->dev_addr, NULL);
1342         }
1343 }
1344
1345 /* Called only under RTNL semaphore */
1346
1347 static int inetdev_event(struct notifier_block *this, unsigned long event,
1348                          void *ptr)
1349 {
1350         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1351         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1352
1353         ASSERT_RTNL();
1354
1355         if (!in_dev) {
1356                 if (event == NETDEV_REGISTER) {
1357                         in_dev = inetdev_init(dev);
1358                         if (IS_ERR(in_dev))
1359                                 return notifier_from_errno(PTR_ERR(in_dev));
1360                         if (dev->flags & IFF_LOOPBACK) {
1361                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1362                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1363                         }
1364                 } else if (event == NETDEV_CHANGEMTU) {
1365                         /* Re-enabling IP */
1366                         if (inetdev_valid_mtu(dev->mtu))
1367                                 in_dev = inetdev_init(dev);
1368                 }
1369                 goto out;
1370         }
1371
1372         switch (event) {
1373         case NETDEV_REGISTER:
1374                 pr_debug("%s: bug\n", __func__);
1375                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1376                 break;
1377         case NETDEV_UP:
1378                 if (!inetdev_valid_mtu(dev->mtu))
1379                         break;
1380                 if (dev->flags & IFF_LOOPBACK) {
1381                         struct in_ifaddr *ifa = inet_alloc_ifa();
1382
1383                         if (ifa) {
1384                                 INIT_HLIST_NODE(&ifa->hash);
1385                                 ifa->ifa_local =
1386                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1387                                 ifa->ifa_prefixlen = 8;
1388                                 ifa->ifa_mask = inet_make_mask(8);
1389                                 in_dev_hold(in_dev);
1390                                 ifa->ifa_dev = in_dev;
1391                                 ifa->ifa_scope = RT_SCOPE_HOST;
1392                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1393                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1394                                                  INFINITY_LIFE_TIME);
1395                                 ipv4_devconf_setall(in_dev);
1396                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1397                                 inet_insert_ifa(ifa);
1398                         }
1399                 }
1400                 ip_mc_up(in_dev);
1401                 /* fall through */
1402         case NETDEV_CHANGEADDR:
1403                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1404                         break;
1405                 /* fall through */
1406         case NETDEV_NOTIFY_PEERS:
1407                 /* Send gratuitous ARP to notify of link change */
1408                 inetdev_send_gratuitous_arp(dev, in_dev);
1409                 break;
1410         case NETDEV_DOWN:
1411                 ip_mc_down(in_dev);
1412                 break;
1413         case NETDEV_PRE_TYPE_CHANGE:
1414                 ip_mc_unmap(in_dev);
1415                 break;
1416         case NETDEV_POST_TYPE_CHANGE:
1417                 ip_mc_remap(in_dev);
1418                 break;
1419         case NETDEV_CHANGEMTU:
1420                 if (inetdev_valid_mtu(dev->mtu))
1421                         break;
1422                 /* disable IP when MTU is not enough */
1423         case NETDEV_UNREGISTER:
1424                 inetdev_destroy(in_dev);
1425                 break;
1426         case NETDEV_CHANGENAME:
1427                 /* Do not notify about label change, this event is
1428                  * not interesting to applications using netlink.
1429                  */
1430                 inetdev_changename(dev, in_dev);
1431
1432                 devinet_sysctl_unregister(in_dev);
1433                 devinet_sysctl_register(in_dev);
1434                 break;
1435         }
1436 out:
1437         return NOTIFY_DONE;
1438 }
1439
1440 static struct notifier_block ip_netdev_notifier = {
1441         .notifier_call = inetdev_event,
1442 };
1443
1444 static size_t inet_nlmsg_size(void)
1445 {
1446         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1447                + nla_total_size(4) /* IFA_ADDRESS */
1448                + nla_total_size(4) /* IFA_LOCAL */
1449                + nla_total_size(4) /* IFA_BROADCAST */
1450                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1451                + nla_total_size(4)  /* IFA_FLAGS */
1452                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1453 }
1454
1455 static inline u32 cstamp_delta(unsigned long cstamp)
1456 {
1457         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1458 }
1459
1460 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1461                          unsigned long tstamp, u32 preferred, u32 valid)
1462 {
1463         struct ifa_cacheinfo ci;
1464
1465         ci.cstamp = cstamp_delta(cstamp);
1466         ci.tstamp = cstamp_delta(tstamp);
1467         ci.ifa_prefered = preferred;
1468         ci.ifa_valid = valid;
1469
1470         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1471 }
1472
1473 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1474                             u32 portid, u32 seq, int event, unsigned int flags)
1475 {
1476         struct ifaddrmsg *ifm;
1477         struct nlmsghdr  *nlh;
1478         u32 preferred, valid;
1479
1480         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1481         if (nlh == NULL)
1482                 return -EMSGSIZE;
1483
1484         ifm = nlmsg_data(nlh);
1485         ifm->ifa_family = AF_INET;
1486         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1487         ifm->ifa_flags = ifa->ifa_flags;
1488         ifm->ifa_scope = ifa->ifa_scope;
1489         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1490
1491         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1492                 preferred = ifa->ifa_preferred_lft;
1493                 valid = ifa->ifa_valid_lft;
1494                 if (preferred != INFINITY_LIFE_TIME) {
1495                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1496
1497                         if (preferred > tval)
1498                                 preferred -= tval;
1499                         else
1500                                 preferred = 0;
1501                         if (valid != INFINITY_LIFE_TIME) {
1502                                 if (valid > tval)
1503                                         valid -= tval;
1504                                 else
1505                                         valid = 0;
1506                         }
1507                 }
1508         } else {
1509                 preferred = INFINITY_LIFE_TIME;
1510                 valid = INFINITY_LIFE_TIME;
1511         }
1512         if ((ifa->ifa_address &&
1513              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1514             (ifa->ifa_local &&
1515              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1516             (ifa->ifa_broadcast &&
1517              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1518             (ifa->ifa_label[0] &&
1519              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1520             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1521             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1522                           preferred, valid))
1523                 goto nla_put_failure;
1524
1525         return nlmsg_end(skb, nlh);
1526
1527 nla_put_failure:
1528         nlmsg_cancel(skb, nlh);
1529         return -EMSGSIZE;
1530 }
1531
1532 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1533 {
1534         struct net *net = sock_net(skb->sk);
1535         int h, s_h;
1536         int idx, s_idx;
1537         int ip_idx, s_ip_idx;
1538         struct net_device *dev;
1539         struct in_device *in_dev;
1540         struct in_ifaddr *ifa;
1541         struct hlist_head *head;
1542
1543         s_h = cb->args[0];
1544         s_idx = idx = cb->args[1];
1545         s_ip_idx = ip_idx = cb->args[2];
1546
1547         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1548                 idx = 0;
1549                 head = &net->dev_index_head[h];
1550                 rcu_read_lock();
1551                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1552                           net->dev_base_seq;
1553                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1554                         if (idx < s_idx)
1555                                 goto cont;
1556                         if (h > s_h || idx > s_idx)
1557                                 s_ip_idx = 0;
1558                         in_dev = __in_dev_get_rcu(dev);
1559                         if (!in_dev)
1560                                 goto cont;
1561
1562                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1563                              ifa = ifa->ifa_next, ip_idx++) {
1564                                 if (ip_idx < s_ip_idx)
1565                                         continue;
1566                                 if (inet_fill_ifaddr(skb, ifa,
1567                                              NETLINK_CB(cb->skb).portid,
1568                                              cb->nlh->nlmsg_seq,
1569                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1570                                         rcu_read_unlock();
1571                                         goto done;
1572                                 }
1573                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1574                         }
1575 cont:
1576                         idx++;
1577                 }
1578                 rcu_read_unlock();
1579         }
1580
1581 done:
1582         cb->args[0] = h;
1583         cb->args[1] = idx;
1584         cb->args[2] = ip_idx;
1585
1586         return skb->len;
1587 }
1588
1589 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1590                       u32 portid)
1591 {
1592         struct sk_buff *skb;
1593         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1594         int err = -ENOBUFS;
1595         struct net *net;
1596
1597         net = dev_net(ifa->ifa_dev->dev);
1598         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1599         if (skb == NULL)
1600                 goto errout;
1601
1602         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1603         if (err < 0) {
1604                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1605                 WARN_ON(err == -EMSGSIZE);
1606                 kfree_skb(skb);
1607                 goto errout;
1608         }
1609         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1610         return;
1611 errout:
1612         if (err < 0)
1613                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1614 }
1615
1616 static size_t inet_get_link_af_size(const struct net_device *dev)
1617 {
1618         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1619
1620         if (!in_dev)
1621                 return 0;
1622
1623         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1624 }
1625
1626 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1627 {
1628         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1629         struct nlattr *nla;
1630         int i;
1631
1632         if (!in_dev)
1633                 return -ENODATA;
1634
1635         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1636         if (nla == NULL)
1637                 return -EMSGSIZE;
1638
1639         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1640                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1641
1642         return 0;
1643 }
1644
1645 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1646         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1647 };
1648
1649 static int inet_validate_link_af(const struct net_device *dev,
1650                                  const struct nlattr *nla)
1651 {
1652         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1653         int err, rem;
1654
1655         if (dev && !__in_dev_get_rtnl(dev))
1656                 return -EAFNOSUPPORT;
1657
1658         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1659         if (err < 0)
1660                 return err;
1661
1662         if (tb[IFLA_INET_CONF]) {
1663                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1664                         int cfgid = nla_type(a);
1665
1666                         if (nla_len(a) < 4)
1667                                 return -EINVAL;
1668
1669                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1670                                 return -EINVAL;
1671                 }
1672         }
1673
1674         return 0;
1675 }
1676
1677 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1678 {
1679         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1680         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1681         int rem;
1682
1683         if (!in_dev)
1684                 return -EAFNOSUPPORT;
1685
1686         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1687                 BUG();
1688
1689         if (tb[IFLA_INET_CONF]) {
1690                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1691                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1692         }
1693
1694         return 0;
1695 }
1696
1697 static int inet_netconf_msgsize_devconf(int type)
1698 {
1699         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1700                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1701
1702         /* type -1 is used for ALL */
1703         if (type == -1 || type == NETCONFA_FORWARDING)
1704                 size += nla_total_size(4);
1705         if (type == -1 || type == NETCONFA_RP_FILTER)
1706                 size += nla_total_size(4);
1707         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1708                 size += nla_total_size(4);
1709         if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1710                 size += nla_total_size(4);
1711
1712         return size;
1713 }
1714
1715 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1716                                      struct ipv4_devconf *devconf, u32 portid,
1717                                      u32 seq, int event, unsigned int flags,
1718                                      int type)
1719 {
1720         struct nlmsghdr  *nlh;
1721         struct netconfmsg *ncm;
1722
1723         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1724                         flags);
1725         if (nlh == NULL)
1726                 return -EMSGSIZE;
1727
1728         ncm = nlmsg_data(nlh);
1729         ncm->ncm_family = AF_INET;
1730
1731         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1732                 goto nla_put_failure;
1733
1734         /* type -1 is used for ALL */
1735         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1736             nla_put_s32(skb, NETCONFA_FORWARDING,
1737                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1738                 goto nla_put_failure;
1739         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1740             nla_put_s32(skb, NETCONFA_RP_FILTER,
1741                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1742                 goto nla_put_failure;
1743         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1744             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1745                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1746                 goto nla_put_failure;
1747         if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1748             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1749                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1750                 goto nla_put_failure;
1751
1752         return nlmsg_end(skb, nlh);
1753
1754 nla_put_failure:
1755         nlmsg_cancel(skb, nlh);
1756         return -EMSGSIZE;
1757 }
1758
1759 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1760                                  struct ipv4_devconf *devconf)
1761 {
1762         struct sk_buff *skb;
1763         int err = -ENOBUFS;
1764
1765         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1766         if (skb == NULL)
1767                 goto errout;
1768
1769         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1770                                         RTM_NEWNETCONF, 0, type);
1771         if (err < 0) {
1772                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1773                 WARN_ON(err == -EMSGSIZE);
1774                 kfree_skb(skb);
1775                 goto errout;
1776         }
1777         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1778         return;
1779 errout:
1780         if (err < 0)
1781                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1782 }
1783
1784 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1785         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1786         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1787         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1788         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1789 };
1790
1791 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1792                                     struct nlmsghdr *nlh)
1793 {
1794         struct net *net = sock_net(in_skb->sk);
1795         struct nlattr *tb[NETCONFA_MAX+1];
1796         struct netconfmsg *ncm;
1797         struct sk_buff *skb;
1798         struct ipv4_devconf *devconf;
1799         struct in_device *in_dev;
1800         struct net_device *dev;
1801         int ifindex;
1802         int err;
1803
1804         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1805                           devconf_ipv4_policy);
1806         if (err < 0)
1807                 goto errout;
1808
1809         err = EINVAL;
1810         if (!tb[NETCONFA_IFINDEX])
1811                 goto errout;
1812
1813         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1814         switch (ifindex) {
1815         case NETCONFA_IFINDEX_ALL:
1816                 devconf = net->ipv4.devconf_all;
1817                 break;
1818         case NETCONFA_IFINDEX_DEFAULT:
1819                 devconf = net->ipv4.devconf_dflt;
1820                 break;
1821         default:
1822                 dev = __dev_get_by_index(net, ifindex);
1823                 if (dev == NULL)
1824                         goto errout;
1825                 in_dev = __in_dev_get_rtnl(dev);
1826                 if (in_dev == NULL)
1827                         goto errout;
1828                 devconf = &in_dev->cnf;
1829                 break;
1830         }
1831
1832         err = -ENOBUFS;
1833         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1834         if (skb == NULL)
1835                 goto errout;
1836
1837         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1838                                         NETLINK_CB(in_skb).portid,
1839                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1840                                         -1);
1841         if (err < 0) {
1842                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1843                 WARN_ON(err == -EMSGSIZE);
1844                 kfree_skb(skb);
1845                 goto errout;
1846         }
1847         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1848 errout:
1849         return err;
1850 }
1851
1852 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1853                                      struct netlink_callback *cb)
1854 {
1855         struct net *net = sock_net(skb->sk);
1856         int h, s_h;
1857         int idx, s_idx;
1858         struct net_device *dev;
1859         struct in_device *in_dev;
1860         struct hlist_head *head;
1861
1862         s_h = cb->args[0];
1863         s_idx = idx = cb->args[1];
1864
1865         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1866                 idx = 0;
1867                 head = &net->dev_index_head[h];
1868                 rcu_read_lock();
1869                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1870                           net->dev_base_seq;
1871                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1872                         if (idx < s_idx)
1873                                 goto cont;
1874                         in_dev = __in_dev_get_rcu(dev);
1875                         if (!in_dev)
1876                                 goto cont;
1877
1878                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1879                                                       &in_dev->cnf,
1880                                                       NETLINK_CB(cb->skb).portid,
1881                                                       cb->nlh->nlmsg_seq,
1882                                                       RTM_NEWNETCONF,
1883                                                       NLM_F_MULTI,
1884                                                       -1) <= 0) {
1885                                 rcu_read_unlock();
1886                                 goto done;
1887                         }
1888                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1889 cont:
1890                         idx++;
1891                 }
1892                 rcu_read_unlock();
1893         }
1894         if (h == NETDEV_HASHENTRIES) {
1895                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1896                                               net->ipv4.devconf_all,
1897                                               NETLINK_CB(cb->skb).portid,
1898                                               cb->nlh->nlmsg_seq,
1899                                               RTM_NEWNETCONF, NLM_F_MULTI,
1900                                               -1) <= 0)
1901                         goto done;
1902                 else
1903                         h++;
1904         }
1905         if (h == NETDEV_HASHENTRIES + 1) {
1906                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1907                                               net->ipv4.devconf_dflt,
1908                                               NETLINK_CB(cb->skb).portid,
1909                                               cb->nlh->nlmsg_seq,
1910                                               RTM_NEWNETCONF, NLM_F_MULTI,
1911                                               -1) <= 0)
1912                         goto done;
1913                 else
1914                         h++;
1915         }
1916 done:
1917         cb->args[0] = h;
1918         cb->args[1] = idx;
1919
1920         return skb->len;
1921 }
1922
1923 #ifdef CONFIG_SYSCTL
1924
1925 static void devinet_copy_dflt_conf(struct net *net, int i)
1926 {
1927         struct net_device *dev;
1928
1929         rcu_read_lock();
1930         for_each_netdev_rcu(net, dev) {
1931                 struct in_device *in_dev;
1932
1933                 in_dev = __in_dev_get_rcu(dev);
1934                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1935                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1936         }
1937         rcu_read_unlock();
1938 }
1939
1940 /* called with RTNL locked */
1941 static void inet_forward_change(struct net *net)
1942 {
1943         struct net_device *dev;
1944         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1945
1946         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1947         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1948         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1949                                     NETCONFA_IFINDEX_ALL,
1950                                     net->ipv4.devconf_all);
1951         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1952                                     NETCONFA_IFINDEX_DEFAULT,
1953                                     net->ipv4.devconf_dflt);
1954
1955         for_each_netdev(net, dev) {
1956                 struct in_device *in_dev;
1957                 if (on)
1958                         dev_disable_lro(dev);
1959                 rcu_read_lock();
1960                 in_dev = __in_dev_get_rcu(dev);
1961                 if (in_dev) {
1962                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1963                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1964                                                     dev->ifindex, &in_dev->cnf);
1965                 }
1966                 rcu_read_unlock();
1967         }
1968 }
1969
1970 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1971 {
1972         if (cnf == net->ipv4.devconf_dflt)
1973                 return NETCONFA_IFINDEX_DEFAULT;
1974         else if (cnf == net->ipv4.devconf_all)
1975                 return NETCONFA_IFINDEX_ALL;
1976         else {
1977                 struct in_device *idev
1978                         = container_of(cnf, struct in_device, cnf);
1979                 return idev->dev->ifindex;
1980         }
1981 }
1982
1983 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1984                              void __user *buffer,
1985                              size_t *lenp, loff_t *ppos)
1986 {
1987         int old_value = *(int *)ctl->data;
1988         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1989         int new_value = *(int *)ctl->data;
1990
1991         if (write) {
1992                 struct ipv4_devconf *cnf = ctl->extra1;
1993                 struct net *net = ctl->extra2;
1994                 int i = (int *)ctl->data - cnf->data;
1995                 int ifindex;
1996
1997                 set_bit(i, cnf->state);
1998
1999                 if (cnf == net->ipv4.devconf_dflt)
2000                         devinet_copy_dflt_conf(net, i);
2001                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2002                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2003                         if ((new_value == 0) && (old_value != 0))
2004                                 rt_cache_flush(net);
2005
2006                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2007                     new_value != old_value) {
2008                         ifindex = devinet_conf_ifindex(net, cnf);
2009                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2010                                                     ifindex, cnf);
2011                 }
2012                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2013                     new_value != old_value) {
2014                         ifindex = devinet_conf_ifindex(net, cnf);
2015                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2016                                                     ifindex, cnf);
2017                 }
2018         }
2019
2020         return ret;
2021 }
2022
2023 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2024                                   void __user *buffer,
2025                                   size_t *lenp, loff_t *ppos)
2026 {
2027         int *valp = ctl->data;
2028         int val = *valp;
2029         loff_t pos = *ppos;
2030         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2031
2032         if (write && *valp != val) {
2033                 struct net *net = ctl->extra2;
2034
2035                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2036                         if (!rtnl_trylock()) {
2037                                 /* Restore the original values before restarting */
2038                                 *valp = val;
2039                                 *ppos = pos;
2040                                 return restart_syscall();
2041                         }
2042                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2043                                 inet_forward_change(net);
2044                         } else {
2045                                 struct ipv4_devconf *cnf = ctl->extra1;
2046                                 struct in_device *idev =
2047                                         container_of(cnf, struct in_device, cnf);
2048                                 if (*valp)
2049                                         dev_disable_lro(idev->dev);
2050                                 inet_netconf_notify_devconf(net,
2051                                                             NETCONFA_FORWARDING,
2052                                                             idev->dev->ifindex,
2053                                                             cnf);
2054                         }
2055                         rtnl_unlock();
2056                         rt_cache_flush(net);
2057                 } else
2058                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2059                                                     NETCONFA_IFINDEX_DEFAULT,
2060                                                     net->ipv4.devconf_dflt);
2061         }
2062
2063         return ret;
2064 }
2065
2066 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2067                                 void __user *buffer,
2068                                 size_t *lenp, loff_t *ppos)
2069 {
2070         int *valp = ctl->data;
2071         int val = *valp;
2072         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2073         struct net *net = ctl->extra2;
2074
2075         if (write && *valp != val)
2076                 rt_cache_flush(net);
2077
2078         return ret;
2079 }
2080
2081 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2082         { \
2083                 .procname       = name, \
2084                 .data           = ipv4_devconf.data + \
2085                                   IPV4_DEVCONF_ ## attr - 1, \
2086                 .maxlen         = sizeof(int), \
2087                 .mode           = mval, \
2088                 .proc_handler   = proc, \
2089                 .extra1         = &ipv4_devconf, \
2090         }
2091
2092 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2093         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2094
2095 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2096         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2097
2098 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2099         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2100
2101 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2102         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2103
2104 static struct devinet_sysctl_table {
2105         struct ctl_table_header *sysctl_header;
2106         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2107 } devinet_sysctl = {
2108         .devinet_vars = {
2109                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2110                                              devinet_sysctl_forward),
2111                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2112
2113                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2114                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2115                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2116                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2117                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2118                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2119                                         "accept_source_route"),
2120                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2121                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2122                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2123                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2124                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2125                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2126                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2127                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2128                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2129                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2130                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2131                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2132                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2133                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2134                                         "force_igmp_version"),
2135                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2136                                         "igmpv2_unsolicited_report_interval"),
2137                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2138                                         "igmpv3_unsolicited_report_interval"),
2139
2140                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2141                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2142                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2143                                               "promote_secondaries"),
2144                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2145                                               "route_localnet"),
2146         },
2147 };
2148
2149 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2150                                         struct ipv4_devconf *p)
2151 {
2152         int i;
2153         struct devinet_sysctl_table *t;
2154         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2155
2156         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2157         if (!t)
2158                 goto out;
2159
2160         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2161                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2162                 t->devinet_vars[i].extra1 = p;
2163                 t->devinet_vars[i].extra2 = net;
2164         }
2165
2166         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2167
2168         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2169         if (!t->sysctl_header)
2170                 goto free;
2171
2172         p->sysctl = t;
2173         return 0;
2174
2175 free:
2176         kfree(t);
2177 out:
2178         return -ENOBUFS;
2179 }
2180
2181 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2182 {
2183         struct devinet_sysctl_table *t = cnf->sysctl;
2184
2185         if (t == NULL)
2186                 return;
2187
2188         cnf->sysctl = NULL;
2189         unregister_net_sysctl_table(t->sysctl_header);
2190         kfree(t);
2191 }
2192
2193 static int devinet_sysctl_register(struct in_device *idev)
2194 {
2195         int err;
2196
2197         if (!sysctl_dev_name_is_allowed(idev->dev->name))
2198                 return -EINVAL;
2199
2200         err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2201         if (err)
2202                 return err;
2203         err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2204                                         &idev->cnf);
2205         if (err)
2206                 neigh_sysctl_unregister(idev->arp_parms);
2207         return err;
2208 }
2209
2210 static void devinet_sysctl_unregister(struct in_device *idev)
2211 {
2212         __devinet_sysctl_unregister(&idev->cnf);
2213         neigh_sysctl_unregister(idev->arp_parms);
2214 }
2215
2216 static struct ctl_table ctl_forward_entry[] = {
2217         {
2218                 .procname       = "ip_forward",
2219                 .data           = &ipv4_devconf.data[
2220                                         IPV4_DEVCONF_FORWARDING - 1],
2221                 .maxlen         = sizeof(int),
2222                 .mode           = 0644,
2223                 .proc_handler   = devinet_sysctl_forward,
2224                 .extra1         = &ipv4_devconf,
2225                 .extra2         = &init_net,
2226         },
2227         { },
2228 };
2229 #endif
2230
2231 static __net_init int devinet_init_net(struct net *net)
2232 {
2233         int err;
2234         struct ipv4_devconf *all, *dflt;
2235 #ifdef CONFIG_SYSCTL
2236         struct ctl_table *tbl = ctl_forward_entry;
2237         struct ctl_table_header *forw_hdr;
2238 #endif
2239
2240         err = -ENOMEM;
2241         all = &ipv4_devconf;
2242         dflt = &ipv4_devconf_dflt;
2243
2244         if (!net_eq(net, &init_net)) {
2245                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2246                 if (all == NULL)
2247                         goto err_alloc_all;
2248
2249                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2250                 if (dflt == NULL)
2251                         goto err_alloc_dflt;
2252
2253 #ifdef CONFIG_SYSCTL
2254                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2255                 if (tbl == NULL)
2256                         goto err_alloc_ctl;
2257
2258                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2259                 tbl[0].extra1 = all;
2260                 tbl[0].extra2 = net;
2261 #endif
2262         }
2263
2264 #ifdef CONFIG_SYSCTL
2265         err = __devinet_sysctl_register(net, "all", all);
2266         if (err < 0)
2267                 goto err_reg_all;
2268
2269         err = __devinet_sysctl_register(net, "default", dflt);
2270         if (err < 0)
2271                 goto err_reg_dflt;
2272
2273         err = -ENOMEM;
2274         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2275         if (forw_hdr == NULL)
2276                 goto err_reg_ctl;
2277         net->ipv4.forw_hdr = forw_hdr;
2278 #endif
2279
2280         net->ipv4.devconf_all = all;
2281         net->ipv4.devconf_dflt = dflt;
2282         return 0;
2283
2284 #ifdef CONFIG_SYSCTL
2285 err_reg_ctl:
2286         __devinet_sysctl_unregister(dflt);
2287 err_reg_dflt:
2288         __devinet_sysctl_unregister(all);
2289 err_reg_all:
2290         if (tbl != ctl_forward_entry)
2291                 kfree(tbl);
2292 err_alloc_ctl:
2293 #endif
2294         if (dflt != &ipv4_devconf_dflt)
2295                 kfree(dflt);
2296 err_alloc_dflt:
2297         if (all != &ipv4_devconf)
2298                 kfree(all);
2299 err_alloc_all:
2300         return err;
2301 }
2302
2303 static __net_exit void devinet_exit_net(struct net *net)
2304 {
2305 #ifdef CONFIG_SYSCTL
2306         struct ctl_table *tbl;
2307
2308         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2309         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2310         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2311         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2312         kfree(tbl);
2313 #endif
2314         kfree(net->ipv4.devconf_dflt);
2315         kfree(net->ipv4.devconf_all);
2316 }
2317
2318 static __net_initdata struct pernet_operations devinet_ops = {
2319         .init = devinet_init_net,
2320         .exit = devinet_exit_net,
2321 };
2322
2323 static struct rtnl_af_ops inet_af_ops = {
2324         .family           = AF_INET,
2325         .fill_link_af     = inet_fill_link_af,
2326         .get_link_af_size = inet_get_link_af_size,
2327         .validate_link_af = inet_validate_link_af,
2328         .set_link_af      = inet_set_link_af,
2329 };
2330
2331 void __init devinet_init(void)
2332 {
2333         int i;
2334
2335         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2336                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2337
2338         register_pernet_subsys(&devinet_ops);
2339
2340         register_gifconf(PF_INET, inet_gifconf);
2341         register_netdevice_notifier(&ip_netdev_notifier);
2342
2343         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2344
2345         rtnl_af_register(&inet_af_ops);
2346
2347         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2348         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2349         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2350         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2351                       inet_netconf_dump_devconf, NULL);
2352 }
2353