Blackfin: bitops: fix include order after little endian inclusion
[cascardo/linux.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 static struct ipv4_devconf ipv4_devconf = {
68         .data = {
69                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
70                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
71                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
73         },
74 };
75
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77         .data = {
78                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
79                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
81                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
82                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83         },
84 };
85
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90         [IFA_LOCAL]             = { .type = NLA_U32 },
91         [IFA_ADDRESS]           = { .type = NLA_U32 },
92         [IFA_BROADCAST]         = { .type = NLA_U32 },
93         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94 };
95
96 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
97  * value.  So if you change this define, make appropriate changes to
98  * inet_addr_hash as well.
99  */
100 #define IN4_ADDR_HSIZE  256
101 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
102 static DEFINE_SPINLOCK(inet_addr_hash_lock);
103
104 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
105 {
106         u32 val = (__force u32) addr ^ hash_ptr(net, 8);
107
108         return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
109                 (IN4_ADDR_HSIZE - 1));
110 }
111
112 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
113 {
114         unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
115
116         spin_lock(&inet_addr_hash_lock);
117         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
118         spin_unlock(&inet_addr_hash_lock);
119 }
120
121 static void inet_hash_remove(struct in_ifaddr *ifa)
122 {
123         spin_lock(&inet_addr_hash_lock);
124         hlist_del_init_rcu(&ifa->hash);
125         spin_unlock(&inet_addr_hash_lock);
126 }
127
128 /**
129  * __ip_dev_find - find the first device with a given source address.
130  * @net: the net namespace
131  * @addr: the source address
132  * @devref: if true, take a reference on the found device
133  *
134  * If a caller uses devref=false, it should be protected by RCU, or RTNL
135  */
136 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
137 {
138         unsigned int hash = inet_addr_hash(net, addr);
139         struct net_device *result = NULL;
140         struct in_ifaddr *ifa;
141         struct hlist_node *node;
142
143         rcu_read_lock();
144         hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
145                 struct net_device *dev = ifa->ifa_dev->dev;
146
147                 if (!net_eq(dev_net(dev), net))
148                         continue;
149                 if (ifa->ifa_local == addr) {
150                         result = dev;
151                         break;
152                 }
153         }
154         if (result && devref)
155                 dev_hold(result);
156         rcu_read_unlock();
157         return result;
158 }
159 EXPORT_SYMBOL(__ip_dev_find);
160
161 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
162
163 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
164 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
165                          int destroy);
166 #ifdef CONFIG_SYSCTL
167 static void devinet_sysctl_register(struct in_device *idev);
168 static void devinet_sysctl_unregister(struct in_device *idev);
169 #else
170 static inline void devinet_sysctl_register(struct in_device *idev)
171 {
172 }
173 static inline void devinet_sysctl_unregister(struct in_device *idev)
174 {
175 }
176 #endif
177
178 /* Locks all the inet devices. */
179
180 static struct in_ifaddr *inet_alloc_ifa(void)
181 {
182         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
183 }
184
185 static void inet_rcu_free_ifa(struct rcu_head *head)
186 {
187         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
188         if (ifa->ifa_dev)
189                 in_dev_put(ifa->ifa_dev);
190         kfree(ifa);
191 }
192
193 static inline void inet_free_ifa(struct in_ifaddr *ifa)
194 {
195         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
196 }
197
198 void in_dev_finish_destroy(struct in_device *idev)
199 {
200         struct net_device *dev = idev->dev;
201
202         WARN_ON(idev->ifa_list);
203         WARN_ON(idev->mc_list);
204 #ifdef NET_REFCNT_DEBUG
205         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
206                idev, dev ? dev->name : "NIL");
207 #endif
208         dev_put(dev);
209         if (!idev->dead)
210                 pr_err("Freeing alive in_device %p\n", idev);
211         else
212                 kfree(idev);
213 }
214 EXPORT_SYMBOL(in_dev_finish_destroy);
215
216 static struct in_device *inetdev_init(struct net_device *dev)
217 {
218         struct in_device *in_dev;
219
220         ASSERT_RTNL();
221
222         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
223         if (!in_dev)
224                 goto out;
225         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
226                         sizeof(in_dev->cnf));
227         in_dev->cnf.sysctl = NULL;
228         in_dev->dev = dev;
229         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
230         if (!in_dev->arp_parms)
231                 goto out_kfree;
232         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
233                 dev_disable_lro(dev);
234         /* Reference in_dev->dev */
235         dev_hold(dev);
236         /* Account for reference dev->ip_ptr (below) */
237         in_dev_hold(in_dev);
238
239         devinet_sysctl_register(in_dev);
240         ip_mc_init_dev(in_dev);
241         if (dev->flags & IFF_UP)
242                 ip_mc_up(in_dev);
243
244         /* we can receive as soon as ip_ptr is set -- do this last */
245         rcu_assign_pointer(dev->ip_ptr, in_dev);
246 out:
247         return in_dev;
248 out_kfree:
249         kfree(in_dev);
250         in_dev = NULL;
251         goto out;
252 }
253
254 static void in_dev_rcu_put(struct rcu_head *head)
255 {
256         struct in_device *idev = container_of(head, struct in_device, rcu_head);
257         in_dev_put(idev);
258 }
259
260 static void inetdev_destroy(struct in_device *in_dev)
261 {
262         struct in_ifaddr *ifa;
263         struct net_device *dev;
264
265         ASSERT_RTNL();
266
267         dev = in_dev->dev;
268
269         in_dev->dead = 1;
270
271         ip_mc_destroy_dev(in_dev);
272
273         while ((ifa = in_dev->ifa_list) != NULL) {
274                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
275                 inet_free_ifa(ifa);
276         }
277
278         rcu_assign_pointer(dev->ip_ptr, NULL);
279
280         devinet_sysctl_unregister(in_dev);
281         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
282         arp_ifdown(dev);
283
284         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
285 }
286
287 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
288 {
289         rcu_read_lock();
290         for_primary_ifa(in_dev) {
291                 if (inet_ifa_match(a, ifa)) {
292                         if (!b || inet_ifa_match(b, ifa)) {
293                                 rcu_read_unlock();
294                                 return 1;
295                         }
296                 }
297         } endfor_ifa(in_dev);
298         rcu_read_unlock();
299         return 0;
300 }
301
302 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
303                          int destroy, struct nlmsghdr *nlh, u32 pid)
304 {
305         struct in_ifaddr *promote = NULL;
306         struct in_ifaddr *ifa, *ifa1 = *ifap;
307         struct in_ifaddr *last_prim = in_dev->ifa_list;
308         struct in_ifaddr *prev_prom = NULL;
309         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
310
311         ASSERT_RTNL();
312
313         /* 1. Deleting primary ifaddr forces deletion all secondaries
314          * unless alias promotion is set
315          **/
316
317         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
318                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
319
320                 while ((ifa = *ifap1) != NULL) {
321                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
322                             ifa1->ifa_scope <= ifa->ifa_scope)
323                                 last_prim = ifa;
324
325                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
326                             ifa1->ifa_mask != ifa->ifa_mask ||
327                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
328                                 ifap1 = &ifa->ifa_next;
329                                 prev_prom = ifa;
330                                 continue;
331                         }
332
333                         if (!do_promote) {
334                                 inet_hash_remove(ifa);
335                                 *ifap1 = ifa->ifa_next;
336
337                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
338                                 blocking_notifier_call_chain(&inetaddr_chain,
339                                                 NETDEV_DOWN, ifa);
340                                 inet_free_ifa(ifa);
341                         } else {
342                                 promote = ifa;
343                                 break;
344                         }
345                 }
346         }
347
348         /* 2. Unlink it */
349
350         *ifap = ifa1->ifa_next;
351         inet_hash_remove(ifa1);
352
353         /* 3. Announce address deletion */
354
355         /* Send message first, then call notifier.
356            At first sight, FIB update triggered by notifier
357            will refer to already deleted ifaddr, that could confuse
358            netlink listeners. It is not true: look, gated sees
359            that route deleted and if it still thinks that ifaddr
360            is valid, it will try to restore deleted routes... Grr.
361            So that, this order is correct.
362          */
363         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
364         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
365
366         if (promote) {
367
368                 if (prev_prom) {
369                         prev_prom->ifa_next = promote->ifa_next;
370                         promote->ifa_next = last_prim->ifa_next;
371                         last_prim->ifa_next = promote;
372                 }
373
374                 promote->ifa_flags &= ~IFA_F_SECONDARY;
375                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
376                 blocking_notifier_call_chain(&inetaddr_chain,
377                                 NETDEV_UP, promote);
378                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
379                         if (ifa1->ifa_mask != ifa->ifa_mask ||
380                             !inet_ifa_match(ifa1->ifa_address, ifa))
381                                         continue;
382                         fib_add_ifaddr(ifa);
383                 }
384
385         }
386         if (destroy)
387                 inet_free_ifa(ifa1);
388 }
389
390 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
391                          int destroy)
392 {
393         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
394 }
395
396 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
397                              u32 pid)
398 {
399         struct in_device *in_dev = ifa->ifa_dev;
400         struct in_ifaddr *ifa1, **ifap, **last_primary;
401
402         ASSERT_RTNL();
403
404         if (!ifa->ifa_local) {
405                 inet_free_ifa(ifa);
406                 return 0;
407         }
408
409         ifa->ifa_flags &= ~IFA_F_SECONDARY;
410         last_primary = &in_dev->ifa_list;
411
412         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
413              ifap = &ifa1->ifa_next) {
414                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
415                     ifa->ifa_scope <= ifa1->ifa_scope)
416                         last_primary = &ifa1->ifa_next;
417                 if (ifa1->ifa_mask == ifa->ifa_mask &&
418                     inet_ifa_match(ifa1->ifa_address, ifa)) {
419                         if (ifa1->ifa_local == ifa->ifa_local) {
420                                 inet_free_ifa(ifa);
421                                 return -EEXIST;
422                         }
423                         if (ifa1->ifa_scope != ifa->ifa_scope) {
424                                 inet_free_ifa(ifa);
425                                 return -EINVAL;
426                         }
427                         ifa->ifa_flags |= IFA_F_SECONDARY;
428                 }
429         }
430
431         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
432                 net_srandom(ifa->ifa_local);
433                 ifap = last_primary;
434         }
435
436         ifa->ifa_next = *ifap;
437         *ifap = ifa;
438
439         inet_hash_insert(dev_net(in_dev->dev), ifa);
440
441         /* Send message first, then call notifier.
442            Notifier will trigger FIB update, so that
443            listeners of netlink will know about new ifaddr */
444         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
445         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
446
447         return 0;
448 }
449
450 static int inet_insert_ifa(struct in_ifaddr *ifa)
451 {
452         return __inet_insert_ifa(ifa, NULL, 0);
453 }
454
455 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
456 {
457         struct in_device *in_dev = __in_dev_get_rtnl(dev);
458
459         ASSERT_RTNL();
460
461         if (!in_dev) {
462                 inet_free_ifa(ifa);
463                 return -ENOBUFS;
464         }
465         ipv4_devconf_setall(in_dev);
466         if (ifa->ifa_dev != in_dev) {
467                 WARN_ON(ifa->ifa_dev);
468                 in_dev_hold(in_dev);
469                 ifa->ifa_dev = in_dev;
470         }
471         if (ipv4_is_loopback(ifa->ifa_local))
472                 ifa->ifa_scope = RT_SCOPE_HOST;
473         return inet_insert_ifa(ifa);
474 }
475
476 /* Caller must hold RCU or RTNL :
477  * We dont take a reference on found in_device
478  */
479 struct in_device *inetdev_by_index(struct net *net, int ifindex)
480 {
481         struct net_device *dev;
482         struct in_device *in_dev = NULL;
483
484         rcu_read_lock();
485         dev = dev_get_by_index_rcu(net, ifindex);
486         if (dev)
487                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
488         rcu_read_unlock();
489         return in_dev;
490 }
491 EXPORT_SYMBOL(inetdev_by_index);
492
493 /* Called only from RTNL semaphored context. No locks. */
494
495 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
496                                     __be32 mask)
497 {
498         ASSERT_RTNL();
499
500         for_primary_ifa(in_dev) {
501                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
502                         return ifa;
503         } endfor_ifa(in_dev);
504         return NULL;
505 }
506
507 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
508 {
509         struct net *net = sock_net(skb->sk);
510         struct nlattr *tb[IFA_MAX+1];
511         struct in_device *in_dev;
512         struct ifaddrmsg *ifm;
513         struct in_ifaddr *ifa, **ifap;
514         int err = -EINVAL;
515
516         ASSERT_RTNL();
517
518         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
519         if (err < 0)
520                 goto errout;
521
522         ifm = nlmsg_data(nlh);
523         in_dev = inetdev_by_index(net, ifm->ifa_index);
524         if (in_dev == NULL) {
525                 err = -ENODEV;
526                 goto errout;
527         }
528
529         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
530              ifap = &ifa->ifa_next) {
531                 if (tb[IFA_LOCAL] &&
532                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
533                         continue;
534
535                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
536                         continue;
537
538                 if (tb[IFA_ADDRESS] &&
539                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
540                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
541                         continue;
542
543                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
544                 return 0;
545         }
546
547         err = -EADDRNOTAVAIL;
548 errout:
549         return err;
550 }
551
552 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
553 {
554         struct nlattr *tb[IFA_MAX+1];
555         struct in_ifaddr *ifa;
556         struct ifaddrmsg *ifm;
557         struct net_device *dev;
558         struct in_device *in_dev;
559         int err;
560
561         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
562         if (err < 0)
563                 goto errout;
564
565         ifm = nlmsg_data(nlh);
566         err = -EINVAL;
567         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
568                 goto errout;
569
570         dev = __dev_get_by_index(net, ifm->ifa_index);
571         err = -ENODEV;
572         if (dev == NULL)
573                 goto errout;
574
575         in_dev = __in_dev_get_rtnl(dev);
576         err = -ENOBUFS;
577         if (in_dev == NULL)
578                 goto errout;
579
580         ifa = inet_alloc_ifa();
581         if (ifa == NULL)
582                 /*
583                  * A potential indev allocation can be left alive, it stays
584                  * assigned to its device and is destroy with it.
585                  */
586                 goto errout;
587
588         ipv4_devconf_setall(in_dev);
589         in_dev_hold(in_dev);
590
591         if (tb[IFA_ADDRESS] == NULL)
592                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
593
594         INIT_HLIST_NODE(&ifa->hash);
595         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
596         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
597         ifa->ifa_flags = ifm->ifa_flags;
598         ifa->ifa_scope = ifm->ifa_scope;
599         ifa->ifa_dev = in_dev;
600
601         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
602         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
603
604         if (tb[IFA_BROADCAST])
605                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
606
607         if (tb[IFA_LABEL])
608                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
609         else
610                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
611
612         return ifa;
613
614 errout:
615         return ERR_PTR(err);
616 }
617
618 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
619 {
620         struct net *net = sock_net(skb->sk);
621         struct in_ifaddr *ifa;
622
623         ASSERT_RTNL();
624
625         ifa = rtm_to_ifaddr(net, nlh);
626         if (IS_ERR(ifa))
627                 return PTR_ERR(ifa);
628
629         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
630 }
631
632 /*
633  *      Determine a default network mask, based on the IP address.
634  */
635
636 static inline int inet_abc_len(__be32 addr)
637 {
638         int rc = -1;    /* Something else, probably a multicast. */
639
640         if (ipv4_is_zeronet(addr))
641                 rc = 0;
642         else {
643                 __u32 haddr = ntohl(addr);
644
645                 if (IN_CLASSA(haddr))
646                         rc = 8;
647                 else if (IN_CLASSB(haddr))
648                         rc = 16;
649                 else if (IN_CLASSC(haddr))
650                         rc = 24;
651         }
652
653         return rc;
654 }
655
656
657 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
658 {
659         struct ifreq ifr;
660         struct sockaddr_in sin_orig;
661         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
662         struct in_device *in_dev;
663         struct in_ifaddr **ifap = NULL;
664         struct in_ifaddr *ifa = NULL;
665         struct net_device *dev;
666         char *colon;
667         int ret = -EFAULT;
668         int tryaddrmatch = 0;
669
670         /*
671          *      Fetch the caller's info block into kernel space
672          */
673
674         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
675                 goto out;
676         ifr.ifr_name[IFNAMSIZ - 1] = 0;
677
678         /* save original address for comparison */
679         memcpy(&sin_orig, sin, sizeof(*sin));
680
681         colon = strchr(ifr.ifr_name, ':');
682         if (colon)
683                 *colon = 0;
684
685         dev_load(net, ifr.ifr_name);
686
687         switch (cmd) {
688         case SIOCGIFADDR:       /* Get interface address */
689         case SIOCGIFBRDADDR:    /* Get the broadcast address */
690         case SIOCGIFDSTADDR:    /* Get the destination address */
691         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
692                 /* Note that these ioctls will not sleep,
693                    so that we do not impose a lock.
694                    One day we will be forced to put shlock here (I mean SMP)
695                  */
696                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
697                 memset(sin, 0, sizeof(*sin));
698                 sin->sin_family = AF_INET;
699                 break;
700
701         case SIOCSIFFLAGS:
702                 ret = -EACCES;
703                 if (!capable(CAP_NET_ADMIN))
704                         goto out;
705                 break;
706         case SIOCSIFADDR:       /* Set interface address (and family) */
707         case SIOCSIFBRDADDR:    /* Set the broadcast address */
708         case SIOCSIFDSTADDR:    /* Set the destination address */
709         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
710                 ret = -EACCES;
711                 if (!capable(CAP_NET_ADMIN))
712                         goto out;
713                 ret = -EINVAL;
714                 if (sin->sin_family != AF_INET)
715                         goto out;
716                 break;
717         default:
718                 ret = -EINVAL;
719                 goto out;
720         }
721
722         rtnl_lock();
723
724         ret = -ENODEV;
725         dev = __dev_get_by_name(net, ifr.ifr_name);
726         if (!dev)
727                 goto done;
728
729         if (colon)
730                 *colon = ':';
731
732         in_dev = __in_dev_get_rtnl(dev);
733         if (in_dev) {
734                 if (tryaddrmatch) {
735                         /* Matthias Andree */
736                         /* compare label and address (4.4BSD style) */
737                         /* note: we only do this for a limited set of ioctls
738                            and only if the original address family was AF_INET.
739                            This is checked above. */
740                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
741                              ifap = &ifa->ifa_next) {
742                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
743                                     sin_orig.sin_addr.s_addr ==
744                                                         ifa->ifa_local) {
745                                         break; /* found */
746                                 }
747                         }
748                 }
749                 /* we didn't get a match, maybe the application is
750                    4.3BSD-style and passed in junk so we fall back to
751                    comparing just the label */
752                 if (!ifa) {
753                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
754                              ifap = &ifa->ifa_next)
755                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
756                                         break;
757                 }
758         }
759
760         ret = -EADDRNOTAVAIL;
761         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
762                 goto done;
763
764         switch (cmd) {
765         case SIOCGIFADDR:       /* Get interface address */
766                 sin->sin_addr.s_addr = ifa->ifa_local;
767                 goto rarok;
768
769         case SIOCGIFBRDADDR:    /* Get the broadcast address */
770                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
771                 goto rarok;
772
773         case SIOCGIFDSTADDR:    /* Get the destination address */
774                 sin->sin_addr.s_addr = ifa->ifa_address;
775                 goto rarok;
776
777         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
778                 sin->sin_addr.s_addr = ifa->ifa_mask;
779                 goto rarok;
780
781         case SIOCSIFFLAGS:
782                 if (colon) {
783                         ret = -EADDRNOTAVAIL;
784                         if (!ifa)
785                                 break;
786                         ret = 0;
787                         if (!(ifr.ifr_flags & IFF_UP))
788                                 inet_del_ifa(in_dev, ifap, 1);
789                         break;
790                 }
791                 ret = dev_change_flags(dev, ifr.ifr_flags);
792                 break;
793
794         case SIOCSIFADDR:       /* Set interface address (and family) */
795                 ret = -EINVAL;
796                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
797                         break;
798
799                 if (!ifa) {
800                         ret = -ENOBUFS;
801                         ifa = inet_alloc_ifa();
802                         INIT_HLIST_NODE(&ifa->hash);
803                         if (!ifa)
804                                 break;
805                         if (colon)
806                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
807                         else
808                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
809                 } else {
810                         ret = 0;
811                         if (ifa->ifa_local == sin->sin_addr.s_addr)
812                                 break;
813                         inet_del_ifa(in_dev, ifap, 0);
814                         ifa->ifa_broadcast = 0;
815                         ifa->ifa_scope = 0;
816                 }
817
818                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
819
820                 if (!(dev->flags & IFF_POINTOPOINT)) {
821                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
822                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
823                         if ((dev->flags & IFF_BROADCAST) &&
824                             ifa->ifa_prefixlen < 31)
825                                 ifa->ifa_broadcast = ifa->ifa_address |
826                                                      ~ifa->ifa_mask;
827                 } else {
828                         ifa->ifa_prefixlen = 32;
829                         ifa->ifa_mask = inet_make_mask(32);
830                 }
831                 ret = inet_set_ifa(dev, ifa);
832                 break;
833
834         case SIOCSIFBRDADDR:    /* Set the broadcast address */
835                 ret = 0;
836                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
837                         inet_del_ifa(in_dev, ifap, 0);
838                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
839                         inet_insert_ifa(ifa);
840                 }
841                 break;
842
843         case SIOCSIFDSTADDR:    /* Set the destination address */
844                 ret = 0;
845                 if (ifa->ifa_address == sin->sin_addr.s_addr)
846                         break;
847                 ret = -EINVAL;
848                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
849                         break;
850                 ret = 0;
851                 inet_del_ifa(in_dev, ifap, 0);
852                 ifa->ifa_address = sin->sin_addr.s_addr;
853                 inet_insert_ifa(ifa);
854                 break;
855
856         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
857
858                 /*
859                  *      The mask we set must be legal.
860                  */
861                 ret = -EINVAL;
862                 if (bad_mask(sin->sin_addr.s_addr, 0))
863                         break;
864                 ret = 0;
865                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
866                         __be32 old_mask = ifa->ifa_mask;
867                         inet_del_ifa(in_dev, ifap, 0);
868                         ifa->ifa_mask = sin->sin_addr.s_addr;
869                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
870
871                         /* See if current broadcast address matches
872                          * with current netmask, then recalculate
873                          * the broadcast address. Otherwise it's a
874                          * funny address, so don't touch it since
875                          * the user seems to know what (s)he's doing...
876                          */
877                         if ((dev->flags & IFF_BROADCAST) &&
878                             (ifa->ifa_prefixlen < 31) &&
879                             (ifa->ifa_broadcast ==
880                              (ifa->ifa_local|~old_mask))) {
881                                 ifa->ifa_broadcast = (ifa->ifa_local |
882                                                       ~sin->sin_addr.s_addr);
883                         }
884                         inet_insert_ifa(ifa);
885                 }
886                 break;
887         }
888 done:
889         rtnl_unlock();
890 out:
891         return ret;
892 rarok:
893         rtnl_unlock();
894         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
895         goto out;
896 }
897
898 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
899 {
900         struct in_device *in_dev = __in_dev_get_rtnl(dev);
901         struct in_ifaddr *ifa;
902         struct ifreq ifr;
903         int done = 0;
904
905         if (!in_dev)
906                 goto out;
907
908         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
909                 if (!buf) {
910                         done += sizeof(ifr);
911                         continue;
912                 }
913                 if (len < (int) sizeof(ifr))
914                         break;
915                 memset(&ifr, 0, sizeof(struct ifreq));
916                 if (ifa->ifa_label)
917                         strcpy(ifr.ifr_name, ifa->ifa_label);
918                 else
919                         strcpy(ifr.ifr_name, dev->name);
920
921                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
922                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
923                                                                 ifa->ifa_local;
924
925                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
926                         done = -EFAULT;
927                         break;
928                 }
929                 buf  += sizeof(struct ifreq);
930                 len  -= sizeof(struct ifreq);
931                 done += sizeof(struct ifreq);
932         }
933 out:
934         return done;
935 }
936
937 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
938 {
939         __be32 addr = 0;
940         struct in_device *in_dev;
941         struct net *net = dev_net(dev);
942
943         rcu_read_lock();
944         in_dev = __in_dev_get_rcu(dev);
945         if (!in_dev)
946                 goto no_in_dev;
947
948         for_primary_ifa(in_dev) {
949                 if (ifa->ifa_scope > scope)
950                         continue;
951                 if (!dst || inet_ifa_match(dst, ifa)) {
952                         addr = ifa->ifa_local;
953                         break;
954                 }
955                 if (!addr)
956                         addr = ifa->ifa_local;
957         } endfor_ifa(in_dev);
958
959         if (addr)
960                 goto out_unlock;
961 no_in_dev:
962
963         /* Not loopback addresses on loopback should be preferred
964            in this case. It is importnat that lo is the first interface
965            in dev_base list.
966          */
967         for_each_netdev_rcu(net, dev) {
968                 in_dev = __in_dev_get_rcu(dev);
969                 if (!in_dev)
970                         continue;
971
972                 for_primary_ifa(in_dev) {
973                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
974                             ifa->ifa_scope <= scope) {
975                                 addr = ifa->ifa_local;
976                                 goto out_unlock;
977                         }
978                 } endfor_ifa(in_dev);
979         }
980 out_unlock:
981         rcu_read_unlock();
982         return addr;
983 }
984 EXPORT_SYMBOL(inet_select_addr);
985
986 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
987                               __be32 local, int scope)
988 {
989         int same = 0;
990         __be32 addr = 0;
991
992         for_ifa(in_dev) {
993                 if (!addr &&
994                     (local == ifa->ifa_local || !local) &&
995                     ifa->ifa_scope <= scope) {
996                         addr = ifa->ifa_local;
997                         if (same)
998                                 break;
999                 }
1000                 if (!same) {
1001                         same = (!local || inet_ifa_match(local, ifa)) &&
1002                                 (!dst || inet_ifa_match(dst, ifa));
1003                         if (same && addr) {
1004                                 if (local || !dst)
1005                                         break;
1006                                 /* Is the selected addr into dst subnet? */
1007                                 if (inet_ifa_match(addr, ifa))
1008                                         break;
1009                                 /* No, then can we use new local src? */
1010                                 if (ifa->ifa_scope <= scope) {
1011                                         addr = ifa->ifa_local;
1012                                         break;
1013                                 }
1014                                 /* search for large dst subnet for addr */
1015                                 same = 0;
1016                         }
1017                 }
1018         } endfor_ifa(in_dev);
1019
1020         return same ? addr : 0;
1021 }
1022
1023 /*
1024  * Confirm that local IP address exists using wildcards:
1025  * - in_dev: only on this interface, 0=any interface
1026  * - dst: only in the same subnet as dst, 0=any dst
1027  * - local: address, 0=autoselect the local address
1028  * - scope: maximum allowed scope value for the local address
1029  */
1030 __be32 inet_confirm_addr(struct in_device *in_dev,
1031                          __be32 dst, __be32 local, int scope)
1032 {
1033         __be32 addr = 0;
1034         struct net_device *dev;
1035         struct net *net;
1036
1037         if (scope != RT_SCOPE_LINK)
1038                 return confirm_addr_indev(in_dev, dst, local, scope);
1039
1040         net = dev_net(in_dev->dev);
1041         rcu_read_lock();
1042         for_each_netdev_rcu(net, dev) {
1043                 in_dev = __in_dev_get_rcu(dev);
1044                 if (in_dev) {
1045                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1046                         if (addr)
1047                                 break;
1048                 }
1049         }
1050         rcu_read_unlock();
1051
1052         return addr;
1053 }
1054
1055 /*
1056  *      Device notifier
1057  */
1058
1059 int register_inetaddr_notifier(struct notifier_block *nb)
1060 {
1061         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1062 }
1063 EXPORT_SYMBOL(register_inetaddr_notifier);
1064
1065 int unregister_inetaddr_notifier(struct notifier_block *nb)
1066 {
1067         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1068 }
1069 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1070
1071 /* Rename ifa_labels for a device name change. Make some effort to preserve
1072  * existing alias numbering and to create unique labels if possible.
1073 */
1074 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1075 {
1076         struct in_ifaddr *ifa;
1077         int named = 0;
1078
1079         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1080                 char old[IFNAMSIZ], *dot;
1081
1082                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1083                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1084                 if (named++ == 0)
1085                         goto skip;
1086                 dot = strchr(old, ':');
1087                 if (dot == NULL) {
1088                         sprintf(old, ":%d", named);
1089                         dot = old;
1090                 }
1091                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1092                         strcat(ifa->ifa_label, dot);
1093                 else
1094                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1095 skip:
1096                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1097         }
1098 }
1099
1100 static inline bool inetdev_valid_mtu(unsigned mtu)
1101 {
1102         return mtu >= 68;
1103 }
1104
1105 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1106                                         struct in_device *in_dev)
1107
1108 {
1109         struct in_ifaddr *ifa = in_dev->ifa_list;
1110
1111         if (!ifa)
1112                 return;
1113
1114         arp_send(ARPOP_REQUEST, ETH_P_ARP,
1115                  ifa->ifa_local, dev,
1116                  ifa->ifa_local, NULL,
1117                  dev->dev_addr, NULL);
1118 }
1119
1120 /* Called only under RTNL semaphore */
1121
1122 static int inetdev_event(struct notifier_block *this, unsigned long event,
1123                          void *ptr)
1124 {
1125         struct net_device *dev = ptr;
1126         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1127
1128         ASSERT_RTNL();
1129
1130         if (!in_dev) {
1131                 if (event == NETDEV_REGISTER) {
1132                         in_dev = inetdev_init(dev);
1133                         if (!in_dev)
1134                                 return notifier_from_errno(-ENOMEM);
1135                         if (dev->flags & IFF_LOOPBACK) {
1136                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1137                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1138                         }
1139                 } else if (event == NETDEV_CHANGEMTU) {
1140                         /* Re-enabling IP */
1141                         if (inetdev_valid_mtu(dev->mtu))
1142                                 in_dev = inetdev_init(dev);
1143                 }
1144                 goto out;
1145         }
1146
1147         switch (event) {
1148         case NETDEV_REGISTER:
1149                 printk(KERN_DEBUG "inetdev_event: bug\n");
1150                 rcu_assign_pointer(dev->ip_ptr, NULL);
1151                 break;
1152         case NETDEV_UP:
1153                 if (!inetdev_valid_mtu(dev->mtu))
1154                         break;
1155                 if (dev->flags & IFF_LOOPBACK) {
1156                         struct in_ifaddr *ifa = inet_alloc_ifa();
1157
1158                         if (ifa) {
1159                                 INIT_HLIST_NODE(&ifa->hash);
1160                                 ifa->ifa_local =
1161                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1162                                 ifa->ifa_prefixlen = 8;
1163                                 ifa->ifa_mask = inet_make_mask(8);
1164                                 in_dev_hold(in_dev);
1165                                 ifa->ifa_dev = in_dev;
1166                                 ifa->ifa_scope = RT_SCOPE_HOST;
1167                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1168                                 inet_insert_ifa(ifa);
1169                         }
1170                 }
1171                 ip_mc_up(in_dev);
1172                 /* fall through */
1173         case NETDEV_CHANGEADDR:
1174                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1175                         break;
1176                 /* fall through */
1177         case NETDEV_NOTIFY_PEERS:
1178                 /* Send gratuitous ARP to notify of link change */
1179                 inetdev_send_gratuitous_arp(dev, in_dev);
1180                 break;
1181         case NETDEV_DOWN:
1182                 ip_mc_down(in_dev);
1183                 break;
1184         case NETDEV_PRE_TYPE_CHANGE:
1185                 ip_mc_unmap(in_dev);
1186                 break;
1187         case NETDEV_POST_TYPE_CHANGE:
1188                 ip_mc_remap(in_dev);
1189                 break;
1190         case NETDEV_CHANGEMTU:
1191                 if (inetdev_valid_mtu(dev->mtu))
1192                         break;
1193                 /* disable IP when MTU is not enough */
1194         case NETDEV_UNREGISTER:
1195                 inetdev_destroy(in_dev);
1196                 break;
1197         case NETDEV_CHANGENAME:
1198                 /* Do not notify about label change, this event is
1199                  * not interesting to applications using netlink.
1200                  */
1201                 inetdev_changename(dev, in_dev);
1202
1203                 devinet_sysctl_unregister(in_dev);
1204                 devinet_sysctl_register(in_dev);
1205                 break;
1206         }
1207 out:
1208         return NOTIFY_DONE;
1209 }
1210
1211 static struct notifier_block ip_netdev_notifier = {
1212         .notifier_call = inetdev_event,
1213 };
1214
1215 static inline size_t inet_nlmsg_size(void)
1216 {
1217         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1218                + nla_total_size(4) /* IFA_ADDRESS */
1219                + nla_total_size(4) /* IFA_LOCAL */
1220                + nla_total_size(4) /* IFA_BROADCAST */
1221                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1222 }
1223
1224 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1225                             u32 pid, u32 seq, int event, unsigned int flags)
1226 {
1227         struct ifaddrmsg *ifm;
1228         struct nlmsghdr  *nlh;
1229
1230         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1231         if (nlh == NULL)
1232                 return -EMSGSIZE;
1233
1234         ifm = nlmsg_data(nlh);
1235         ifm->ifa_family = AF_INET;
1236         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1237         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1238         ifm->ifa_scope = ifa->ifa_scope;
1239         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1240
1241         if (ifa->ifa_address)
1242                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1243
1244         if (ifa->ifa_local)
1245                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1246
1247         if (ifa->ifa_broadcast)
1248                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1249
1250         if (ifa->ifa_label[0])
1251                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1252
1253         return nlmsg_end(skb, nlh);
1254
1255 nla_put_failure:
1256         nlmsg_cancel(skb, nlh);
1257         return -EMSGSIZE;
1258 }
1259
1260 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1261 {
1262         struct net *net = sock_net(skb->sk);
1263         int h, s_h;
1264         int idx, s_idx;
1265         int ip_idx, s_ip_idx;
1266         struct net_device *dev;
1267         struct in_device *in_dev;
1268         struct in_ifaddr *ifa;
1269         struct hlist_head *head;
1270         struct hlist_node *node;
1271
1272         s_h = cb->args[0];
1273         s_idx = idx = cb->args[1];
1274         s_ip_idx = ip_idx = cb->args[2];
1275
1276         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1277                 idx = 0;
1278                 head = &net->dev_index_head[h];
1279                 rcu_read_lock();
1280                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1281                         if (idx < s_idx)
1282                                 goto cont;
1283                         if (h > s_h || idx > s_idx)
1284                                 s_ip_idx = 0;
1285                         in_dev = __in_dev_get_rcu(dev);
1286                         if (!in_dev)
1287                                 goto cont;
1288
1289                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1290                              ifa = ifa->ifa_next, ip_idx++) {
1291                                 if (ip_idx < s_ip_idx)
1292                                         continue;
1293                                 if (inet_fill_ifaddr(skb, ifa,
1294                                              NETLINK_CB(cb->skb).pid,
1295                                              cb->nlh->nlmsg_seq,
1296                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1297                                         rcu_read_unlock();
1298                                         goto done;
1299                                 }
1300                         }
1301 cont:
1302                         idx++;
1303                 }
1304                 rcu_read_unlock();
1305         }
1306
1307 done:
1308         cb->args[0] = h;
1309         cb->args[1] = idx;
1310         cb->args[2] = ip_idx;
1311
1312         return skb->len;
1313 }
1314
1315 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1316                       u32 pid)
1317 {
1318         struct sk_buff *skb;
1319         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1320         int err = -ENOBUFS;
1321         struct net *net;
1322
1323         net = dev_net(ifa->ifa_dev->dev);
1324         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1325         if (skb == NULL)
1326                 goto errout;
1327
1328         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1329         if (err < 0) {
1330                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1331                 WARN_ON(err == -EMSGSIZE);
1332                 kfree_skb(skb);
1333                 goto errout;
1334         }
1335         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1336         return;
1337 errout:
1338         if (err < 0)
1339                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1340 }
1341
1342 static size_t inet_get_link_af_size(const struct net_device *dev)
1343 {
1344         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1345
1346         if (!in_dev)
1347                 return 0;
1348
1349         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1350 }
1351
1352 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1353 {
1354         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1355         struct nlattr *nla;
1356         int i;
1357
1358         if (!in_dev)
1359                 return -ENODATA;
1360
1361         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1362         if (nla == NULL)
1363                 return -EMSGSIZE;
1364
1365         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1366                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1367
1368         return 0;
1369 }
1370
1371 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1372         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1373 };
1374
1375 static int inet_validate_link_af(const struct net_device *dev,
1376                                  const struct nlattr *nla)
1377 {
1378         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1379         int err, rem;
1380
1381         if (dev && !__in_dev_get_rtnl(dev))
1382                 return -EAFNOSUPPORT;
1383
1384         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1385         if (err < 0)
1386                 return err;
1387
1388         if (tb[IFLA_INET_CONF]) {
1389                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1390                         int cfgid = nla_type(a);
1391
1392                         if (nla_len(a) < 4)
1393                                 return -EINVAL;
1394
1395                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1396                                 return -EINVAL;
1397                 }
1398         }
1399
1400         return 0;
1401 }
1402
1403 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1404 {
1405         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1406         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1407         int rem;
1408
1409         if (!in_dev)
1410                 return -EAFNOSUPPORT;
1411
1412         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1413                 BUG();
1414
1415         if (tb[IFLA_INET_CONF]) {
1416                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1417                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1418         }
1419
1420         return 0;
1421 }
1422
1423 #ifdef CONFIG_SYSCTL
1424
1425 static void devinet_copy_dflt_conf(struct net *net, int i)
1426 {
1427         struct net_device *dev;
1428
1429         rcu_read_lock();
1430         for_each_netdev_rcu(net, dev) {
1431                 struct in_device *in_dev;
1432
1433                 in_dev = __in_dev_get_rcu(dev);
1434                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1435                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1436         }
1437         rcu_read_unlock();
1438 }
1439
1440 /* called with RTNL locked */
1441 static void inet_forward_change(struct net *net)
1442 {
1443         struct net_device *dev;
1444         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1445
1446         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1447         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1448
1449         for_each_netdev(net, dev) {
1450                 struct in_device *in_dev;
1451                 if (on)
1452                         dev_disable_lro(dev);
1453                 rcu_read_lock();
1454                 in_dev = __in_dev_get_rcu(dev);
1455                 if (in_dev)
1456                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1457                 rcu_read_unlock();
1458         }
1459 }
1460
1461 static int devinet_conf_proc(ctl_table *ctl, int write,
1462                              void __user *buffer,
1463                              size_t *lenp, loff_t *ppos)
1464 {
1465         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1466
1467         if (write) {
1468                 struct ipv4_devconf *cnf = ctl->extra1;
1469                 struct net *net = ctl->extra2;
1470                 int i = (int *)ctl->data - cnf->data;
1471
1472                 set_bit(i, cnf->state);
1473
1474                 if (cnf == net->ipv4.devconf_dflt)
1475                         devinet_copy_dflt_conf(net, i);
1476         }
1477
1478         return ret;
1479 }
1480
1481 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1482                                   void __user *buffer,
1483                                   size_t *lenp, loff_t *ppos)
1484 {
1485         int *valp = ctl->data;
1486         int val = *valp;
1487         loff_t pos = *ppos;
1488         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1489
1490         if (write && *valp != val) {
1491                 struct net *net = ctl->extra2;
1492
1493                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1494                         if (!rtnl_trylock()) {
1495                                 /* Restore the original values before restarting */
1496                                 *valp = val;
1497                                 *ppos = pos;
1498                                 return restart_syscall();
1499                         }
1500                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1501                                 inet_forward_change(net);
1502                         } else if (*valp) {
1503                                 struct ipv4_devconf *cnf = ctl->extra1;
1504                                 struct in_device *idev =
1505                                         container_of(cnf, struct in_device, cnf);
1506                                 dev_disable_lro(idev->dev);
1507                         }
1508                         rtnl_unlock();
1509                         rt_cache_flush(net, 0);
1510                 }
1511         }
1512
1513         return ret;
1514 }
1515
1516 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1517                                 void __user *buffer,
1518                                 size_t *lenp, loff_t *ppos)
1519 {
1520         int *valp = ctl->data;
1521         int val = *valp;
1522         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1523         struct net *net = ctl->extra2;
1524
1525         if (write && *valp != val)
1526                 rt_cache_flush(net, 0);
1527
1528         return ret;
1529 }
1530
1531 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1532         { \
1533                 .procname       = name, \
1534                 .data           = ipv4_devconf.data + \
1535                                   IPV4_DEVCONF_ ## attr - 1, \
1536                 .maxlen         = sizeof(int), \
1537                 .mode           = mval, \
1538                 .proc_handler   = proc, \
1539                 .extra1         = &ipv4_devconf, \
1540         }
1541
1542 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1543         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1544
1545 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1546         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1547
1548 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1549         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1550
1551 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1552         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1553
1554 static struct devinet_sysctl_table {
1555         struct ctl_table_header *sysctl_header;
1556         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1557         char *dev_name;
1558 } devinet_sysctl = {
1559         .devinet_vars = {
1560                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1561                                              devinet_sysctl_forward),
1562                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1563
1564                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1565                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1566                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1567                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1568                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1569                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1570                                         "accept_source_route"),
1571                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1572                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1573                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1574                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1575                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1576                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1577                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1578                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1579                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1580                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1581                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1582                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1583                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1584
1585                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1586                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1587                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1588                                               "force_igmp_version"),
1589                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1590                                               "promote_secondaries"),
1591         },
1592 };
1593
1594 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1595                                         struct ipv4_devconf *p)
1596 {
1597         int i;
1598         struct devinet_sysctl_table *t;
1599
1600 #define DEVINET_CTL_PATH_DEV    3
1601
1602         struct ctl_path devinet_ctl_path[] = {
1603                 { .procname = "net",  },
1604                 { .procname = "ipv4", },
1605                 { .procname = "conf", },
1606                 { /* to be set */ },
1607                 { },
1608         };
1609
1610         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1611         if (!t)
1612                 goto out;
1613
1614         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1615                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1616                 t->devinet_vars[i].extra1 = p;
1617                 t->devinet_vars[i].extra2 = net;
1618         }
1619
1620         /*
1621          * Make a copy of dev_name, because '.procname' is regarded as const
1622          * by sysctl and we wouldn't want anyone to change it under our feet
1623          * (see SIOCSIFNAME).
1624          */
1625         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1626         if (!t->dev_name)
1627                 goto free;
1628
1629         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1630
1631         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1632                         t->devinet_vars);
1633         if (!t->sysctl_header)
1634                 goto free_procname;
1635
1636         p->sysctl = t;
1637         return 0;
1638
1639 free_procname:
1640         kfree(t->dev_name);
1641 free:
1642         kfree(t);
1643 out:
1644         return -ENOBUFS;
1645 }
1646
1647 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1648 {
1649         struct devinet_sysctl_table *t = cnf->sysctl;
1650
1651         if (t == NULL)
1652                 return;
1653
1654         cnf->sysctl = NULL;
1655         unregister_sysctl_table(t->sysctl_header);
1656         kfree(t->dev_name);
1657         kfree(t);
1658 }
1659
1660 static void devinet_sysctl_register(struct in_device *idev)
1661 {
1662         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1663         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1664                                         &idev->cnf);
1665 }
1666
1667 static void devinet_sysctl_unregister(struct in_device *idev)
1668 {
1669         __devinet_sysctl_unregister(&idev->cnf);
1670         neigh_sysctl_unregister(idev->arp_parms);
1671 }
1672
1673 static struct ctl_table ctl_forward_entry[] = {
1674         {
1675                 .procname       = "ip_forward",
1676                 .data           = &ipv4_devconf.data[
1677                                         IPV4_DEVCONF_FORWARDING - 1],
1678                 .maxlen         = sizeof(int),
1679                 .mode           = 0644,
1680                 .proc_handler   = devinet_sysctl_forward,
1681                 .extra1         = &ipv4_devconf,
1682                 .extra2         = &init_net,
1683         },
1684         { },
1685 };
1686
1687 static __net_initdata struct ctl_path net_ipv4_path[] = {
1688         { .procname = "net", },
1689         { .procname = "ipv4", },
1690         { },
1691 };
1692 #endif
1693
1694 static __net_init int devinet_init_net(struct net *net)
1695 {
1696         int err;
1697         struct ipv4_devconf *all, *dflt;
1698 #ifdef CONFIG_SYSCTL
1699         struct ctl_table *tbl = ctl_forward_entry;
1700         struct ctl_table_header *forw_hdr;
1701 #endif
1702
1703         err = -ENOMEM;
1704         all = &ipv4_devconf;
1705         dflt = &ipv4_devconf_dflt;
1706
1707         if (!net_eq(net, &init_net)) {
1708                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1709                 if (all == NULL)
1710                         goto err_alloc_all;
1711
1712                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1713                 if (dflt == NULL)
1714                         goto err_alloc_dflt;
1715
1716 #ifdef CONFIG_SYSCTL
1717                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1718                 if (tbl == NULL)
1719                         goto err_alloc_ctl;
1720
1721                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1722                 tbl[0].extra1 = all;
1723                 tbl[0].extra2 = net;
1724 #endif
1725         }
1726
1727 #ifdef CONFIG_SYSCTL
1728         err = __devinet_sysctl_register(net, "all", all);
1729         if (err < 0)
1730                 goto err_reg_all;
1731
1732         err = __devinet_sysctl_register(net, "default", dflt);
1733         if (err < 0)
1734                 goto err_reg_dflt;
1735
1736         err = -ENOMEM;
1737         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1738         if (forw_hdr == NULL)
1739                 goto err_reg_ctl;
1740         net->ipv4.forw_hdr = forw_hdr;
1741 #endif
1742
1743         net->ipv4.devconf_all = all;
1744         net->ipv4.devconf_dflt = dflt;
1745         return 0;
1746
1747 #ifdef CONFIG_SYSCTL
1748 err_reg_ctl:
1749         __devinet_sysctl_unregister(dflt);
1750 err_reg_dflt:
1751         __devinet_sysctl_unregister(all);
1752 err_reg_all:
1753         if (tbl != ctl_forward_entry)
1754                 kfree(tbl);
1755 err_alloc_ctl:
1756 #endif
1757         if (dflt != &ipv4_devconf_dflt)
1758                 kfree(dflt);
1759 err_alloc_dflt:
1760         if (all != &ipv4_devconf)
1761                 kfree(all);
1762 err_alloc_all:
1763         return err;
1764 }
1765
1766 static __net_exit void devinet_exit_net(struct net *net)
1767 {
1768 #ifdef CONFIG_SYSCTL
1769         struct ctl_table *tbl;
1770
1771         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1772         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1773         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1774         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1775         kfree(tbl);
1776 #endif
1777         kfree(net->ipv4.devconf_dflt);
1778         kfree(net->ipv4.devconf_all);
1779 }
1780
1781 static __net_initdata struct pernet_operations devinet_ops = {
1782         .init = devinet_init_net,
1783         .exit = devinet_exit_net,
1784 };
1785
1786 static struct rtnl_af_ops inet_af_ops = {
1787         .family           = AF_INET,
1788         .fill_link_af     = inet_fill_link_af,
1789         .get_link_af_size = inet_get_link_af_size,
1790         .validate_link_af = inet_validate_link_af,
1791         .set_link_af      = inet_set_link_af,
1792 };
1793
1794 void __init devinet_init(void)
1795 {
1796         int i;
1797
1798         for (i = 0; i < IN4_ADDR_HSIZE; i++)
1799                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1800
1801         register_pernet_subsys(&devinet_ops);
1802
1803         register_gifconf(PF_INET, inet_gifconf);
1804         register_netdevice_notifier(&ip_netdev_notifier);
1805
1806         rtnl_af_register(&inet_af_ops);
1807
1808         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1809         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1810         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1811 }
1812