e229a3bc345dc4138a188282c4ab4f1717882832
[cascardo/linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex,
91                                            unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93                                            const struct in6_addr *prefix, int prefixlen,
94                                            const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99         struct rt6_info *rt = (struct rt6_info *) dst;
100         struct inet_peer *peer;
101         u32 *p = NULL;
102
103         if (!(rt->dst.flags & DST_HOST))
104                 return NULL;
105
106         peer = rt6_get_peer_create(rt);
107         if (peer) {
108                 u32 *old_p = __DST_METRICS_PTR(old);
109                 unsigned long prev, new;
110
111                 p = peer->metrics;
112                 if (inet_metrics_new(peer))
113                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115                 new = (unsigned long) p;
116                 prev = cmpxchg(&dst->_metrics, old, new);
117
118                 if (prev != old) {
119                         p = __DST_METRICS_PTR(prev);
120                         if (prev & DST_METRICS_READ_ONLY)
121                                 p = NULL;
122                 }
123         }
124         return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128                                              struct sk_buff *skb,
129                                              const void *daddr)
130 {
131         struct in6_addr *p = &rt->rt6i_gateway;
132
133         if (!ipv6_addr_any(p))
134                 return (const void *) p;
135         else if (skb)
136                 return &ipv6_hdr(skb)->daddr;
137         return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141                                           struct sk_buff *skb,
142                                           const void *daddr)
143 {
144         struct rt6_info *rt = (struct rt6_info *) dst;
145         struct neighbour *n;
146
147         daddr = choose_neigh_daddr(rt, skb, daddr);
148         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
149         if (n)
150                 return n;
151         return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155 {
156         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
157         if (!n) {
158                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159                 if (IS_ERR(n))
160                         return PTR_ERR(n);
161         }
162         rt->n = n;
163
164         return 0;
165 }
166
167 static struct dst_ops ip6_dst_ops_template = {
168         .family                 =       AF_INET6,
169         .protocol               =       cpu_to_be16(ETH_P_IPV6),
170         .gc                     =       ip6_dst_gc,
171         .gc_thresh              =       1024,
172         .check                  =       ip6_dst_check,
173         .default_advmss         =       ip6_default_advmss,
174         .mtu                    =       ip6_mtu,
175         .cow_metrics            =       ipv6_cow_metrics,
176         .destroy                =       ip6_dst_destroy,
177         .ifdown                 =       ip6_dst_ifdown,
178         .negative_advice        =       ip6_negative_advice,
179         .link_failure           =       ip6_link_failure,
180         .update_pmtu            =       ip6_rt_update_pmtu,
181         .redirect               =       rt6_do_redirect,
182         .local_out              =       __ip6_local_out,
183         .neigh_lookup           =       ip6_neigh_lookup,
184 };
185
186 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187 {
188         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190         return mtu ? : dst->dev->mtu;
191 }
192
193 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194                                          struct sk_buff *skb, u32 mtu)
195 {
196 }
197
198 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199                                       struct sk_buff *skb)
200 {
201 }
202
203 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204                                          unsigned long old)
205 {
206         return NULL;
207 }
208
209 static struct dst_ops ip6_dst_blackhole_ops = {
210         .family                 =       AF_INET6,
211         .protocol               =       cpu_to_be16(ETH_P_IPV6),
212         .destroy                =       ip6_dst_destroy,
213         .check                  =       ip6_dst_check,
214         .mtu                    =       ip6_blackhole_mtu,
215         .default_advmss         =       ip6_default_advmss,
216         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
217         .redirect               =       ip6_rt_blackhole_redirect,
218         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
219         .neigh_lookup           =       ip6_neigh_lookup,
220 };
221
222 static const u32 ip6_template_metrics[RTAX_MAX] = {
223         [RTAX_HOPLIMIT - 1] = 0,
224 };
225
226 static const struct rt6_info ip6_null_entry_template = {
227         .dst = {
228                 .__refcnt       = ATOMIC_INIT(1),
229                 .__use          = 1,
230                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
231                 .error          = -ENETUNREACH,
232                 .input          = ip6_pkt_discard,
233                 .output         = ip6_pkt_discard_out,
234         },
235         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
236         .rt6i_protocol  = RTPROT_KERNEL,
237         .rt6i_metric    = ~(u32) 0,
238         .rt6i_ref       = ATOMIC_INIT(1),
239 };
240
241 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
243 static int ip6_pkt_prohibit(struct sk_buff *skb);
244 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
245
246 static const struct rt6_info ip6_prohibit_entry_template = {
247         .dst = {
248                 .__refcnt       = ATOMIC_INIT(1),
249                 .__use          = 1,
250                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
251                 .error          = -EACCES,
252                 .input          = ip6_pkt_prohibit,
253                 .output         = ip6_pkt_prohibit_out,
254         },
255         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
256         .rt6i_protocol  = RTPROT_KERNEL,
257         .rt6i_metric    = ~(u32) 0,
258         .rt6i_ref       = ATOMIC_INIT(1),
259 };
260
261 static const struct rt6_info ip6_blk_hole_entry_template = {
262         .dst = {
263                 .__refcnt       = ATOMIC_INIT(1),
264                 .__use          = 1,
265                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
266                 .error          = -EINVAL,
267                 .input          = dst_discard,
268                 .output         = dst_discard,
269         },
270         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
271         .rt6i_protocol  = RTPROT_KERNEL,
272         .rt6i_metric    = ~(u32) 0,
273         .rt6i_ref       = ATOMIC_INIT(1),
274 };
275
276 #endif
277
278 /* allocate dst with ip6_dst_ops */
279 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280                                              struct net_device *dev,
281                                              int flags,
282                                              struct fib6_table *table)
283 {
284         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285                                         0, DST_OBSOLETE_FORCE_CHK, flags);
286
287         if (rt) {
288                 struct dst_entry *dst = &rt->dst;
289
290                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292                 rt->rt6i_genid = rt_genid(net);
293                 INIT_LIST_HEAD(&rt->rt6i_siblings);
294                 rt->rt6i_nsiblings = 0;
295         }
296         return rt;
297 }
298
299 static void ip6_dst_destroy(struct dst_entry *dst)
300 {
301         struct rt6_info *rt = (struct rt6_info *)dst;
302         struct inet6_dev *idev = rt->rt6i_idev;
303
304         if (rt->n)
305                 neigh_release(rt->n);
306
307         if (!(rt->dst.flags & DST_HOST))
308                 dst_destroy_metrics_generic(dst);
309
310         if (idev) {
311                 rt->rt6i_idev = NULL;
312                 in6_dev_put(idev);
313         }
314
315         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316                 dst_release(dst->from);
317
318         if (rt6_has_peer(rt)) {
319                 struct inet_peer *peer = rt6_peer_ptr(rt);
320                 inet_putpeer(peer);
321         }
322 }
323
324 void rt6_bind_peer(struct rt6_info *rt, int create)
325 {
326         struct inet_peer_base *base;
327         struct inet_peer *peer;
328
329         base = inetpeer_base_ptr(rt->_rt6i_peer);
330         if (!base)
331                 return;
332
333         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
334         if (peer) {
335                 if (!rt6_set_peer(rt, peer))
336                         inet_putpeer(peer);
337         }
338 }
339
340 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
341                            int how)
342 {
343         struct rt6_info *rt = (struct rt6_info *)dst;
344         struct inet6_dev *idev = rt->rt6i_idev;
345         struct net_device *loopback_dev =
346                 dev_net(dev)->loopback_dev;
347
348         if (dev != loopback_dev) {
349                 if (idev && idev->dev == dev) {
350                         struct inet6_dev *loopback_idev =
351                                 in6_dev_get(loopback_dev);
352                         if (loopback_idev) {
353                                 rt->rt6i_idev = loopback_idev;
354                                 in6_dev_put(idev);
355                         }
356                 }
357                 if (rt->n && rt->n->dev == dev) {
358                         rt->n->dev = loopback_dev;
359                         dev_hold(loopback_dev);
360                         dev_put(dev);
361                 }
362         }
363 }
364
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367         if (rt->rt6i_flags & RTF_EXPIRES) {
368                 if (time_after(jiffies, rt->dst.expires))
369                         return true;
370         } else if (rt->dst.from) {
371                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
372         }
373         return false;
374 }
375
376 static bool rt6_need_strict(const struct in6_addr *daddr)
377 {
378         return ipv6_addr_type(daddr) &
379                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
380 }
381
382 /* Multipath route selection:
383  *   Hash based function using packet header and flowlabel.
384  * Adapted from fib_info_hashfn()
385  */
386 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387                                const struct flowi6 *fl6)
388 {
389         unsigned int val = fl6->flowi6_proto;
390
391         val ^= (__force u32)fl6->daddr.s6_addr32[0];
392         val ^= (__force u32)fl6->daddr.s6_addr32[1];
393         val ^= (__force u32)fl6->daddr.s6_addr32[2];
394         val ^= (__force u32)fl6->daddr.s6_addr32[3];
395
396         val ^= (__force u32)fl6->saddr.s6_addr32[0];
397         val ^= (__force u32)fl6->saddr.s6_addr32[1];
398         val ^= (__force u32)fl6->saddr.s6_addr32[2];
399         val ^= (__force u32)fl6->saddr.s6_addr32[3];
400
401         /* Work only if this not encapsulated */
402         switch (fl6->flowi6_proto) {
403         case IPPROTO_UDP:
404         case IPPROTO_TCP:
405         case IPPROTO_SCTP:
406                 val ^= (__force u16)fl6->fl6_sport;
407                 val ^= (__force u16)fl6->fl6_dport;
408                 break;
409
410         case IPPROTO_ICMPV6:
411                 val ^= (__force u16)fl6->fl6_icmp_type;
412                 val ^= (__force u16)fl6->fl6_icmp_code;
413                 break;
414         }
415         /* RFC6438 recommands to use flowlabel */
416         val ^= (__force u32)fl6->flowlabel;
417
418         /* Perhaps, we need to tune, this function? */
419         val = val ^ (val >> 7) ^ (val >> 12);
420         return val % candidate_count;
421 }
422
423 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
424                                              struct flowi6 *fl6)
425 {
426         struct rt6_info *sibling, *next_sibling;
427         int route_choosen;
428
429         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
430         /* Don't change the route, if route_choosen == 0
431          * (siblings does not include ourself)
432          */
433         if (route_choosen)
434                 list_for_each_entry_safe(sibling, next_sibling,
435                                 &match->rt6i_siblings, rt6i_siblings) {
436                         route_choosen--;
437                         if (route_choosen == 0) {
438                                 match = sibling;
439                                 break;
440                         }
441                 }
442         return match;
443 }
444
445 /*
446  *      Route lookup. Any table->tb6_lock is implied.
447  */
448
449 static inline struct rt6_info *rt6_device_match(struct net *net,
450                                                     struct rt6_info *rt,
451                                                     const struct in6_addr *saddr,
452                                                     int oif,
453                                                     int flags)
454 {
455         struct rt6_info *local = NULL;
456         struct rt6_info *sprt;
457
458         if (!oif && ipv6_addr_any(saddr))
459                 goto out;
460
461         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
462                 struct net_device *dev = sprt->dst.dev;
463
464                 if (oif) {
465                         if (dev->ifindex == oif)
466                                 return sprt;
467                         if (dev->flags & IFF_LOOPBACK) {
468                                 if (!sprt->rt6i_idev ||
469                                     sprt->rt6i_idev->dev->ifindex != oif) {
470                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
471                                                 continue;
472                                         if (local && (!oif ||
473                                                       local->rt6i_idev->dev->ifindex == oif))
474                                                 continue;
475                                 }
476                                 local = sprt;
477                         }
478                 } else {
479                         if (ipv6_chk_addr(net, saddr, dev,
480                                           flags & RT6_LOOKUP_F_IFACE))
481                                 return sprt;
482                 }
483         }
484
485         if (oif) {
486                 if (local)
487                         return local;
488
489                 if (flags & RT6_LOOKUP_F_IFACE)
490                         return net->ipv6.ip6_null_entry;
491         }
492 out:
493         return rt;
494 }
495
496 #ifdef CONFIG_IPV6_ROUTER_PREF
497 static void rt6_probe(struct rt6_info *rt)
498 {
499         struct neighbour *neigh;
500         /*
501          * Okay, this does not seem to be appropriate
502          * for now, however, we need to check if it
503          * is really so; aka Router Reachability Probing.
504          *
505          * Router Reachability Probe MUST be rate-limited
506          * to no more than one per minute.
507          */
508         neigh = rt ? rt->n : NULL;
509         if (!neigh || (neigh->nud_state & NUD_VALID))
510                 return;
511         read_lock_bh(&neigh->lock);
512         if (!(neigh->nud_state & NUD_VALID) &&
513             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
514                 struct in6_addr mcaddr;
515                 struct in6_addr *target;
516
517                 neigh->updated = jiffies;
518                 read_unlock_bh(&neigh->lock);
519
520                 target = (struct in6_addr *)&neigh->primary_key;
521                 addrconf_addr_solict_mult(target, &mcaddr);
522                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
523         } else {
524                 read_unlock_bh(&neigh->lock);
525         }
526 }
527 #else
528 static inline void rt6_probe(struct rt6_info *rt)
529 {
530 }
531 #endif
532
533 /*
534  * Default Router Selection (RFC 2461 6.3.6)
535  */
536 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
537 {
538         struct net_device *dev = rt->dst.dev;
539         if (!oif || dev->ifindex == oif)
540                 return 2;
541         if ((dev->flags & IFF_LOOPBACK) &&
542             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
543                 return 1;
544         return 0;
545 }
546
547 static inline bool rt6_check_neigh(struct rt6_info *rt)
548 {
549         struct neighbour *neigh;
550         bool ret = false;
551
552         neigh = rt->n;
553         if (rt->rt6i_flags & RTF_NONEXTHOP ||
554             !(rt->rt6i_flags & RTF_GATEWAY))
555                 ret = true;
556         else if (neigh) {
557                 read_lock_bh(&neigh->lock);
558                 if (neigh->nud_state & NUD_VALID)
559                         ret = true;
560 #ifdef CONFIG_IPV6_ROUTER_PREF
561                 else if (!(neigh->nud_state & NUD_FAILED))
562                         ret = true;
563 #endif
564                 read_unlock_bh(&neigh->lock);
565         }
566         return ret;
567 }
568
569 static int rt6_score_route(struct rt6_info *rt, int oif,
570                            int strict)
571 {
572         int m;
573
574         m = rt6_check_dev(rt, oif);
575         if (!m && (strict & RT6_LOOKUP_F_IFACE))
576                 return -1;
577 #ifdef CONFIG_IPV6_ROUTER_PREF
578         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
579 #endif
580         if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
581                 return -1;
582         return m;
583 }
584
585 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
586                                    int *mpri, struct rt6_info *match)
587 {
588         int m;
589
590         if (rt6_check_expired(rt))
591                 goto out;
592
593         m = rt6_score_route(rt, oif, strict);
594         if (m < 0)
595                 goto out;
596
597         if (m > *mpri) {
598                 if (strict & RT6_LOOKUP_F_REACHABLE)
599                         rt6_probe(match);
600                 *mpri = m;
601                 match = rt;
602         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
603                 rt6_probe(rt);
604         }
605
606 out:
607         return match;
608 }
609
610 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
611                                      struct rt6_info *rr_head,
612                                      u32 metric, int oif, int strict)
613 {
614         struct rt6_info *rt, *match;
615         int mpri = -1;
616
617         match = NULL;
618         for (rt = rr_head; rt && rt->rt6i_metric == metric;
619              rt = rt->dst.rt6_next)
620                 match = find_match(rt, oif, strict, &mpri, match);
621         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
622              rt = rt->dst.rt6_next)
623                 match = find_match(rt, oif, strict, &mpri, match);
624
625         return match;
626 }
627
628 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
629 {
630         struct rt6_info *match, *rt0;
631         struct net *net;
632
633         rt0 = fn->rr_ptr;
634         if (!rt0)
635                 fn->rr_ptr = rt0 = fn->leaf;
636
637         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
638
639         if (!match &&
640             (strict & RT6_LOOKUP_F_REACHABLE)) {
641                 struct rt6_info *next = rt0->dst.rt6_next;
642
643                 /* no entries matched; do round-robin */
644                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
645                         next = fn->leaf;
646
647                 if (next != rt0)
648                         fn->rr_ptr = next;
649         }
650
651         net = dev_net(rt0->dst.dev);
652         return match ? match : net->ipv6.ip6_null_entry;
653 }
654
655 #ifdef CONFIG_IPV6_ROUTE_INFO
656 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
657                   const struct in6_addr *gwaddr)
658 {
659         struct net *net = dev_net(dev);
660         struct route_info *rinfo = (struct route_info *) opt;
661         struct in6_addr prefix_buf, *prefix;
662         unsigned int pref;
663         unsigned long lifetime;
664         struct rt6_info *rt;
665
666         if (len < sizeof(struct route_info)) {
667                 return -EINVAL;
668         }
669
670         /* Sanity check for prefix_len and length */
671         if (rinfo->length > 3) {
672                 return -EINVAL;
673         } else if (rinfo->prefix_len > 128) {
674                 return -EINVAL;
675         } else if (rinfo->prefix_len > 64) {
676                 if (rinfo->length < 2) {
677                         return -EINVAL;
678                 }
679         } else if (rinfo->prefix_len > 0) {
680                 if (rinfo->length < 1) {
681                         return -EINVAL;
682                 }
683         }
684
685         pref = rinfo->route_pref;
686         if (pref == ICMPV6_ROUTER_PREF_INVALID)
687                 return -EINVAL;
688
689         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
690
691         if (rinfo->length == 3)
692                 prefix = (struct in6_addr *)rinfo->prefix;
693         else {
694                 /* this function is safe */
695                 ipv6_addr_prefix(&prefix_buf,
696                                  (struct in6_addr *)rinfo->prefix,
697                                  rinfo->prefix_len);
698                 prefix = &prefix_buf;
699         }
700
701         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
702                                 dev->ifindex);
703
704         if (rt && !lifetime) {
705                 ip6_del_rt(rt);
706                 rt = NULL;
707         }
708
709         if (!rt && lifetime)
710                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
711                                         pref);
712         else if (rt)
713                 rt->rt6i_flags = RTF_ROUTEINFO |
714                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
715
716         if (rt) {
717                 if (!addrconf_finite_timeout(lifetime))
718                         rt6_clean_expires(rt);
719                 else
720                         rt6_set_expires(rt, jiffies + HZ * lifetime);
721
722                 ip6_rt_put(rt);
723         }
724         return 0;
725 }
726 #endif
727
728 #define BACKTRACK(__net, saddr)                 \
729 do { \
730         if (rt == __net->ipv6.ip6_null_entry) { \
731                 struct fib6_node *pn; \
732                 while (1) { \
733                         if (fn->fn_flags & RTN_TL_ROOT) \
734                                 goto out; \
735                         pn = fn->parent; \
736                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
737                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
738                         else \
739                                 fn = pn; \
740                         if (fn->fn_flags & RTN_RTINFO) \
741                                 goto restart; \
742                 } \
743         } \
744 } while (0)
745
746 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
747                                              struct fib6_table *table,
748                                              struct flowi6 *fl6, int flags)
749 {
750         struct fib6_node *fn;
751         struct rt6_info *rt;
752
753         read_lock_bh(&table->tb6_lock);
754         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
755 restart:
756         rt = fn->leaf;
757         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
758         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
759                 rt = rt6_multipath_select(rt, fl6);
760         BACKTRACK(net, &fl6->saddr);
761 out:
762         dst_use(&rt->dst, jiffies);
763         read_unlock_bh(&table->tb6_lock);
764         return rt;
765
766 }
767
768 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
769                                     int flags)
770 {
771         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
772 }
773 EXPORT_SYMBOL_GPL(ip6_route_lookup);
774
775 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
776                             const struct in6_addr *saddr, int oif, int strict)
777 {
778         struct flowi6 fl6 = {
779                 .flowi6_oif = oif,
780                 .daddr = *daddr,
781         };
782         struct dst_entry *dst;
783         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
784
785         if (saddr) {
786                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
787                 flags |= RT6_LOOKUP_F_HAS_SADDR;
788         }
789
790         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
791         if (dst->error == 0)
792                 return (struct rt6_info *) dst;
793
794         dst_release(dst);
795
796         return NULL;
797 }
798
799 EXPORT_SYMBOL(rt6_lookup);
800
801 /* ip6_ins_rt is called with FREE table->tb6_lock.
802    It takes new route entry, the addition fails by any reason the
803    route is freed. In any case, if caller does not hold it, it may
804    be destroyed.
805  */
806
807 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
808 {
809         int err;
810         struct fib6_table *table;
811
812         table = rt->rt6i_table;
813         write_lock_bh(&table->tb6_lock);
814         err = fib6_add(&table->tb6_root, rt, info);
815         write_unlock_bh(&table->tb6_lock);
816
817         return err;
818 }
819
820 int ip6_ins_rt(struct rt6_info *rt)
821 {
822         struct nl_info info = {
823                 .nl_net = dev_net(rt->dst.dev),
824         };
825         return __ip6_ins_rt(rt, &info);
826 }
827
828 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
829                                       const struct in6_addr *daddr,
830                                       const struct in6_addr *saddr)
831 {
832         struct rt6_info *rt;
833
834         /*
835          *      Clone the route.
836          */
837
838         rt = ip6_rt_copy(ort, daddr);
839
840         if (rt) {
841                 int attempts = !in_softirq();
842
843                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
844                         if (ort->rt6i_dst.plen != 128 &&
845                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
846                                 rt->rt6i_flags |= RTF_ANYCAST;
847                         rt->rt6i_gateway = *daddr;
848                 }
849
850                 rt->rt6i_flags |= RTF_CACHE;
851
852 #ifdef CONFIG_IPV6_SUBTREES
853                 if (rt->rt6i_src.plen && saddr) {
854                         rt->rt6i_src.addr = *saddr;
855                         rt->rt6i_src.plen = 128;
856                 }
857 #endif
858
859         retry:
860                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
861                         struct net *net = dev_net(rt->dst.dev);
862                         int saved_rt_min_interval =
863                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
864                         int saved_rt_elasticity =
865                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
866
867                         if (attempts-- > 0) {
868                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
869                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
870
871                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
872
873                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
874                                         saved_rt_elasticity;
875                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
876                                         saved_rt_min_interval;
877                                 goto retry;
878                         }
879
880                         net_warn_ratelimited("Neighbour table overflow\n");
881                         dst_free(&rt->dst);
882                         return NULL;
883                 }
884         }
885
886         return rt;
887 }
888
889 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
890                                         const struct in6_addr *daddr)
891 {
892         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
893
894         if (rt) {
895                 rt->rt6i_flags |= RTF_CACHE;
896                 rt->n = neigh_clone(ort->n);
897         }
898         return rt;
899 }
900
901 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
902                                       struct flowi6 *fl6, int flags)
903 {
904         struct fib6_node *fn;
905         struct rt6_info *rt, *nrt;
906         int strict = 0;
907         int attempts = 3;
908         int err;
909         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
910
911         strict |= flags & RT6_LOOKUP_F_IFACE;
912
913 relookup:
914         read_lock_bh(&table->tb6_lock);
915
916 restart_2:
917         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
918
919 restart:
920         rt = rt6_select(fn, oif, strict | reachable);
921         if (rt->rt6i_nsiblings && oif == 0)
922                 rt = rt6_multipath_select(rt, fl6);
923         BACKTRACK(net, &fl6->saddr);
924         if (rt == net->ipv6.ip6_null_entry ||
925             rt->rt6i_flags & RTF_CACHE)
926                 goto out;
927
928         dst_hold(&rt->dst);
929         read_unlock_bh(&table->tb6_lock);
930
931         if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
932                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
933         else if (!(rt->dst.flags & DST_HOST))
934                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
935         else
936                 goto out2;
937
938         ip6_rt_put(rt);
939         rt = nrt ? : net->ipv6.ip6_null_entry;
940
941         dst_hold(&rt->dst);
942         if (nrt) {
943                 err = ip6_ins_rt(nrt);
944                 if (!err)
945                         goto out2;
946         }
947
948         if (--attempts <= 0)
949                 goto out2;
950
951         /*
952          * Race condition! In the gap, when table->tb6_lock was
953          * released someone could insert this route.  Relookup.
954          */
955         ip6_rt_put(rt);
956         goto relookup;
957
958 out:
959         if (reachable) {
960                 reachable = 0;
961                 goto restart_2;
962         }
963         dst_hold(&rt->dst);
964         read_unlock_bh(&table->tb6_lock);
965 out2:
966         rt->dst.lastuse = jiffies;
967         rt->dst.__use++;
968
969         return rt;
970 }
971
972 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
973                                             struct flowi6 *fl6, int flags)
974 {
975         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
976 }
977
978 static struct dst_entry *ip6_route_input_lookup(struct net *net,
979                                                 struct net_device *dev,
980                                                 struct flowi6 *fl6, int flags)
981 {
982         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
983                 flags |= RT6_LOOKUP_F_IFACE;
984
985         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
986 }
987
988 void ip6_route_input(struct sk_buff *skb)
989 {
990         const struct ipv6hdr *iph = ipv6_hdr(skb);
991         struct net *net = dev_net(skb->dev);
992         int flags = RT6_LOOKUP_F_HAS_SADDR;
993         struct flowi6 fl6 = {
994                 .flowi6_iif = skb->dev->ifindex,
995                 .daddr = iph->daddr,
996                 .saddr = iph->saddr,
997                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
998                 .flowi6_mark = skb->mark,
999                 .flowi6_proto = iph->nexthdr,
1000         };
1001
1002         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1003 }
1004
1005 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1006                                              struct flowi6 *fl6, int flags)
1007 {
1008         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1009 }
1010
1011 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1012                                     struct flowi6 *fl6)
1013 {
1014         int flags = 0;
1015
1016         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1017
1018         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1019                 flags |= RT6_LOOKUP_F_IFACE;
1020
1021         if (!ipv6_addr_any(&fl6->saddr))
1022                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1023         else if (sk)
1024                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1025
1026         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1027 }
1028
1029 EXPORT_SYMBOL(ip6_route_output);
1030
1031 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1032 {
1033         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1034         struct dst_entry *new = NULL;
1035
1036         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1037         if (rt) {
1038                 new = &rt->dst;
1039
1040                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1041                 rt6_init_peer(rt, net->ipv6.peers);
1042
1043                 new->__use = 1;
1044                 new->input = dst_discard;
1045                 new->output = dst_discard;
1046
1047                 if (dst_metrics_read_only(&ort->dst))
1048                         new->_metrics = ort->dst._metrics;
1049                 else
1050                         dst_copy_metrics(new, &ort->dst);
1051                 rt->rt6i_idev = ort->rt6i_idev;
1052                 if (rt->rt6i_idev)
1053                         in6_dev_hold(rt->rt6i_idev);
1054
1055                 rt->rt6i_gateway = ort->rt6i_gateway;
1056                 rt->rt6i_flags = ort->rt6i_flags;
1057                 rt6_clean_expires(rt);
1058                 rt->rt6i_metric = 0;
1059
1060                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1061 #ifdef CONFIG_IPV6_SUBTREES
1062                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1063 #endif
1064
1065                 dst_free(new);
1066         }
1067
1068         dst_release(dst_orig);
1069         return new ? new : ERR_PTR(-ENOMEM);
1070 }
1071
1072 /*
1073  *      Destination cache support functions
1074  */
1075
1076 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1077 {
1078         struct rt6_info *rt;
1079
1080         rt = (struct rt6_info *) dst;
1081
1082         /* All IPV6 dsts are created with ->obsolete set to the value
1083          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1084          * into this function always.
1085          */
1086         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1087                 return NULL;
1088
1089         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1090                 return dst;
1091
1092         return NULL;
1093 }
1094
1095 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1096 {
1097         struct rt6_info *rt = (struct rt6_info *) dst;
1098
1099         if (rt) {
1100                 if (rt->rt6i_flags & RTF_CACHE) {
1101                         if (rt6_check_expired(rt)) {
1102                                 ip6_del_rt(rt);
1103                                 dst = NULL;
1104                         }
1105                 } else {
1106                         dst_release(dst);
1107                         dst = NULL;
1108                 }
1109         }
1110         return dst;
1111 }
1112
1113 static void ip6_link_failure(struct sk_buff *skb)
1114 {
1115         struct rt6_info *rt;
1116
1117         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1118
1119         rt = (struct rt6_info *) skb_dst(skb);
1120         if (rt) {
1121                 if (rt->rt6i_flags & RTF_CACHE)
1122                         rt6_update_expires(rt, 0);
1123                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1124                         rt->rt6i_node->fn_sernum = -1;
1125         }
1126 }
1127
1128 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1129                                struct sk_buff *skb, u32 mtu)
1130 {
1131         struct rt6_info *rt6 = (struct rt6_info*)dst;
1132
1133         dst_confirm(dst);
1134         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1135                 struct net *net = dev_net(dst->dev);
1136
1137                 rt6->rt6i_flags |= RTF_MODIFIED;
1138                 if (mtu < IPV6_MIN_MTU) {
1139                         u32 features = dst_metric(dst, RTAX_FEATURES);
1140                         mtu = IPV6_MIN_MTU;
1141                         features |= RTAX_FEATURE_ALLFRAG;
1142                         dst_metric_set(dst, RTAX_FEATURES, features);
1143                 }
1144                 dst_metric_set(dst, RTAX_MTU, mtu);
1145                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1146         }
1147 }
1148
1149 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1150                      int oif, u32 mark)
1151 {
1152         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1153         struct dst_entry *dst;
1154         struct flowi6 fl6;
1155
1156         memset(&fl6, 0, sizeof(fl6));
1157         fl6.flowi6_oif = oif;
1158         fl6.flowi6_mark = mark;
1159         fl6.flowi6_flags = 0;
1160         fl6.daddr = iph->daddr;
1161         fl6.saddr = iph->saddr;
1162         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1163
1164         dst = ip6_route_output(net, NULL, &fl6);
1165         if (!dst->error)
1166                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1167         dst_release(dst);
1168 }
1169 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1170
1171 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1172 {
1173         ip6_update_pmtu(skb, sock_net(sk), mtu,
1174                         sk->sk_bound_dev_if, sk->sk_mark);
1175 }
1176 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1177
1178 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1179 {
1180         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1181         struct dst_entry *dst;
1182         struct flowi6 fl6;
1183
1184         memset(&fl6, 0, sizeof(fl6));
1185         fl6.flowi6_oif = oif;
1186         fl6.flowi6_mark = mark;
1187         fl6.flowi6_flags = 0;
1188         fl6.daddr = iph->daddr;
1189         fl6.saddr = iph->saddr;
1190         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1191
1192         dst = ip6_route_output(net, NULL, &fl6);
1193         if (!dst->error)
1194                 rt6_do_redirect(dst, NULL, skb);
1195         dst_release(dst);
1196 }
1197 EXPORT_SYMBOL_GPL(ip6_redirect);
1198
1199 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1200 {
1201         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1202 }
1203 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1204
1205 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1206 {
1207         struct net_device *dev = dst->dev;
1208         unsigned int mtu = dst_mtu(dst);
1209         struct net *net = dev_net(dev);
1210
1211         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1212
1213         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1214                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1215
1216         /*
1217          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1218          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1219          * IPV6_MAXPLEN is also valid and means: "any MSS,
1220          * rely only on pmtu discovery"
1221          */
1222         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1223                 mtu = IPV6_MAXPLEN;
1224         return mtu;
1225 }
1226
1227 static unsigned int ip6_mtu(const struct dst_entry *dst)
1228 {
1229         struct inet6_dev *idev;
1230         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1231
1232         if (mtu)
1233                 return mtu;
1234
1235         mtu = IPV6_MIN_MTU;
1236
1237         rcu_read_lock();
1238         idev = __in6_dev_get(dst->dev);
1239         if (idev)
1240                 mtu = idev->cnf.mtu6;
1241         rcu_read_unlock();
1242
1243         return mtu;
1244 }
1245
1246 static struct dst_entry *icmp6_dst_gc_list;
1247 static DEFINE_SPINLOCK(icmp6_dst_lock);
1248
1249 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1250                                   struct neighbour *neigh,
1251                                   struct flowi6 *fl6)
1252 {
1253         struct dst_entry *dst;
1254         struct rt6_info *rt;
1255         struct inet6_dev *idev = in6_dev_get(dev);
1256         struct net *net = dev_net(dev);
1257
1258         if (unlikely(!idev))
1259                 return ERR_PTR(-ENODEV);
1260
1261         rt = ip6_dst_alloc(net, dev, 0, NULL);
1262         if (unlikely(!rt)) {
1263                 in6_dev_put(idev);
1264                 dst = ERR_PTR(-ENOMEM);
1265                 goto out;
1266         }
1267
1268         if (neigh)
1269                 neigh_hold(neigh);
1270         else {
1271                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1272                 if (IS_ERR(neigh)) {
1273                         in6_dev_put(idev);
1274                         dst_free(&rt->dst);
1275                         return ERR_CAST(neigh);
1276                 }
1277         }
1278
1279         rt->dst.flags |= DST_HOST;
1280         rt->dst.output  = ip6_output;
1281         rt->n = neigh;
1282         atomic_set(&rt->dst.__refcnt, 1);
1283         rt->rt6i_dst.addr = fl6->daddr;
1284         rt->rt6i_dst.plen = 128;
1285         rt->rt6i_idev     = idev;
1286         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1287
1288         spin_lock_bh(&icmp6_dst_lock);
1289         rt->dst.next = icmp6_dst_gc_list;
1290         icmp6_dst_gc_list = &rt->dst;
1291         spin_unlock_bh(&icmp6_dst_lock);
1292
1293         fib6_force_start_gc(net);
1294
1295         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1296
1297 out:
1298         return dst;
1299 }
1300
1301 int icmp6_dst_gc(void)
1302 {
1303         struct dst_entry *dst, **pprev;
1304         int more = 0;
1305
1306         spin_lock_bh(&icmp6_dst_lock);
1307         pprev = &icmp6_dst_gc_list;
1308
1309         while ((dst = *pprev) != NULL) {
1310                 if (!atomic_read(&dst->__refcnt)) {
1311                         *pprev = dst->next;
1312                         dst_free(dst);
1313                 } else {
1314                         pprev = &dst->next;
1315                         ++more;
1316                 }
1317         }
1318
1319         spin_unlock_bh(&icmp6_dst_lock);
1320
1321         return more;
1322 }
1323
1324 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1325                             void *arg)
1326 {
1327         struct dst_entry *dst, **pprev;
1328
1329         spin_lock_bh(&icmp6_dst_lock);
1330         pprev = &icmp6_dst_gc_list;
1331         while ((dst = *pprev) != NULL) {
1332                 struct rt6_info *rt = (struct rt6_info *) dst;
1333                 if (func(rt, arg)) {
1334                         *pprev = dst->next;
1335                         dst_free(dst);
1336                 } else {
1337                         pprev = &dst->next;
1338                 }
1339         }
1340         spin_unlock_bh(&icmp6_dst_lock);
1341 }
1342
1343 static int ip6_dst_gc(struct dst_ops *ops)
1344 {
1345         unsigned long now = jiffies;
1346         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1347         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1348         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1349         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1350         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1351         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1352         int entries;
1353
1354         entries = dst_entries_get_fast(ops);
1355         if (time_after(rt_last_gc + rt_min_interval, now) &&
1356             entries <= rt_max_size)
1357                 goto out;
1358
1359         net->ipv6.ip6_rt_gc_expire++;
1360         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1361         net->ipv6.ip6_rt_last_gc = now;
1362         entries = dst_entries_get_slow(ops);
1363         if (entries < ops->gc_thresh)
1364                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1365 out:
1366         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1367         return entries > rt_max_size;
1368 }
1369
1370 int ip6_dst_hoplimit(struct dst_entry *dst)
1371 {
1372         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1373         if (hoplimit == 0) {
1374                 struct net_device *dev = dst->dev;
1375                 struct inet6_dev *idev;
1376
1377                 rcu_read_lock();
1378                 idev = __in6_dev_get(dev);
1379                 if (idev)
1380                         hoplimit = idev->cnf.hop_limit;
1381                 else
1382                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1383                 rcu_read_unlock();
1384         }
1385         return hoplimit;
1386 }
1387 EXPORT_SYMBOL(ip6_dst_hoplimit);
1388
1389 /*
1390  *
1391  */
1392
1393 int ip6_route_add(struct fib6_config *cfg)
1394 {
1395         int err;
1396         struct net *net = cfg->fc_nlinfo.nl_net;
1397         struct rt6_info *rt = NULL;
1398         struct net_device *dev = NULL;
1399         struct inet6_dev *idev = NULL;
1400         struct fib6_table *table;
1401         int addr_type;
1402
1403         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1404                 return -EINVAL;
1405 #ifndef CONFIG_IPV6_SUBTREES
1406         if (cfg->fc_src_len)
1407                 return -EINVAL;
1408 #endif
1409         if (cfg->fc_ifindex) {
1410                 err = -ENODEV;
1411                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1412                 if (!dev)
1413                         goto out;
1414                 idev = in6_dev_get(dev);
1415                 if (!idev)
1416                         goto out;
1417         }
1418
1419         if (cfg->fc_metric == 0)
1420                 cfg->fc_metric = IP6_RT_PRIO_USER;
1421
1422         err = -ENOBUFS;
1423         if (cfg->fc_nlinfo.nlh &&
1424             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1425                 table = fib6_get_table(net, cfg->fc_table);
1426                 if (!table) {
1427                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1428                         table = fib6_new_table(net, cfg->fc_table);
1429                 }
1430         } else {
1431                 table = fib6_new_table(net, cfg->fc_table);
1432         }
1433
1434         if (!table)
1435                 goto out;
1436
1437         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1438
1439         if (!rt) {
1440                 err = -ENOMEM;
1441                 goto out;
1442         }
1443
1444         if (cfg->fc_flags & RTF_EXPIRES)
1445                 rt6_set_expires(rt, jiffies +
1446                                 clock_t_to_jiffies(cfg->fc_expires));
1447         else
1448                 rt6_clean_expires(rt);
1449
1450         if (cfg->fc_protocol == RTPROT_UNSPEC)
1451                 cfg->fc_protocol = RTPROT_BOOT;
1452         rt->rt6i_protocol = cfg->fc_protocol;
1453
1454         addr_type = ipv6_addr_type(&cfg->fc_dst);
1455
1456         if (addr_type & IPV6_ADDR_MULTICAST)
1457                 rt->dst.input = ip6_mc_input;
1458         else if (cfg->fc_flags & RTF_LOCAL)
1459                 rt->dst.input = ip6_input;
1460         else
1461                 rt->dst.input = ip6_forward;
1462
1463         rt->dst.output = ip6_output;
1464
1465         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1466         rt->rt6i_dst.plen = cfg->fc_dst_len;
1467         if (rt->rt6i_dst.plen == 128)
1468                rt->dst.flags |= DST_HOST;
1469
1470         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1471                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1472                 if (!metrics) {
1473                         err = -ENOMEM;
1474                         goto out;
1475                 }
1476                 dst_init_metrics(&rt->dst, metrics, 0);
1477         }
1478 #ifdef CONFIG_IPV6_SUBTREES
1479         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1480         rt->rt6i_src.plen = cfg->fc_src_len;
1481 #endif
1482
1483         rt->rt6i_metric = cfg->fc_metric;
1484
1485         /* We cannot add true routes via loopback here,
1486            they would result in kernel looping; promote them to reject routes
1487          */
1488         if ((cfg->fc_flags & RTF_REJECT) ||
1489             (dev && (dev->flags & IFF_LOOPBACK) &&
1490              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1491              !(cfg->fc_flags & RTF_LOCAL))) {
1492                 /* hold loopback dev/idev if we haven't done so. */
1493                 if (dev != net->loopback_dev) {
1494                         if (dev) {
1495                                 dev_put(dev);
1496                                 in6_dev_put(idev);
1497                         }
1498                         dev = net->loopback_dev;
1499                         dev_hold(dev);
1500                         idev = in6_dev_get(dev);
1501                         if (!idev) {
1502                                 err = -ENODEV;
1503                                 goto out;
1504                         }
1505                 }
1506                 rt->dst.output = ip6_pkt_discard_out;
1507                 rt->dst.input = ip6_pkt_discard;
1508                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1509                 switch (cfg->fc_type) {
1510                 case RTN_BLACKHOLE:
1511                         rt->dst.error = -EINVAL;
1512                         break;
1513                 case RTN_PROHIBIT:
1514                         rt->dst.error = -EACCES;
1515                         break;
1516                 case RTN_THROW:
1517                         rt->dst.error = -EAGAIN;
1518                         break;
1519                 default:
1520                         rt->dst.error = -ENETUNREACH;
1521                         break;
1522                 }
1523                 goto install_route;
1524         }
1525
1526         if (cfg->fc_flags & RTF_GATEWAY) {
1527                 const struct in6_addr *gw_addr;
1528                 int gwa_type;
1529
1530                 gw_addr = &cfg->fc_gateway;
1531                 rt->rt6i_gateway = *gw_addr;
1532                 gwa_type = ipv6_addr_type(gw_addr);
1533
1534                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1535                         struct rt6_info *grt;
1536
1537                         /* IPv6 strictly inhibits using not link-local
1538                            addresses as nexthop address.
1539                            Otherwise, router will not able to send redirects.
1540                            It is very good, but in some (rare!) circumstances
1541                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1542                            some exceptions. --ANK
1543                          */
1544                         err = -EINVAL;
1545                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1546                                 goto out;
1547
1548                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1549
1550                         err = -EHOSTUNREACH;
1551                         if (!grt)
1552                                 goto out;
1553                         if (dev) {
1554                                 if (dev != grt->dst.dev) {
1555                                         ip6_rt_put(grt);
1556                                         goto out;
1557                                 }
1558                         } else {
1559                                 dev = grt->dst.dev;
1560                                 idev = grt->rt6i_idev;
1561                                 dev_hold(dev);
1562                                 in6_dev_hold(grt->rt6i_idev);
1563                         }
1564                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1565                                 err = 0;
1566                         ip6_rt_put(grt);
1567
1568                         if (err)
1569                                 goto out;
1570                 }
1571                 err = -EINVAL;
1572                 if (!dev || (dev->flags & IFF_LOOPBACK))
1573                         goto out;
1574         }
1575
1576         err = -ENODEV;
1577         if (!dev)
1578                 goto out;
1579
1580         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1581                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1582                         err = -EINVAL;
1583                         goto out;
1584                 }
1585                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1586                 rt->rt6i_prefsrc.plen = 128;
1587         } else
1588                 rt->rt6i_prefsrc.plen = 0;
1589
1590         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1591                 err = rt6_bind_neighbour(rt, dev);
1592                 if (err)
1593                         goto out;
1594         }
1595
1596         rt->rt6i_flags = cfg->fc_flags;
1597
1598 install_route:
1599         if (cfg->fc_mx) {
1600                 struct nlattr *nla;
1601                 int remaining;
1602
1603                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1604                         int type = nla_type(nla);
1605
1606                         if (type) {
1607                                 if (type > RTAX_MAX) {
1608                                         err = -EINVAL;
1609                                         goto out;
1610                                 }
1611
1612                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1613                         }
1614                 }
1615         }
1616
1617         rt->dst.dev = dev;
1618         rt->rt6i_idev = idev;
1619         rt->rt6i_table = table;
1620
1621         cfg->fc_nlinfo.nl_net = dev_net(dev);
1622
1623         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1624
1625 out:
1626         if (dev)
1627                 dev_put(dev);
1628         if (idev)
1629                 in6_dev_put(idev);
1630         if (rt)
1631                 dst_free(&rt->dst);
1632         return err;
1633 }
1634
1635 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1636 {
1637         int err;
1638         struct fib6_table *table;
1639         struct net *net = dev_net(rt->dst.dev);
1640
1641         if (rt == net->ipv6.ip6_null_entry) {
1642                 err = -ENOENT;
1643                 goto out;
1644         }
1645
1646         table = rt->rt6i_table;
1647         write_lock_bh(&table->tb6_lock);
1648         err = fib6_del(rt, info);
1649         write_unlock_bh(&table->tb6_lock);
1650
1651 out:
1652         ip6_rt_put(rt);
1653         return err;
1654 }
1655
1656 int ip6_del_rt(struct rt6_info *rt)
1657 {
1658         struct nl_info info = {
1659                 .nl_net = dev_net(rt->dst.dev),
1660         };
1661         return __ip6_del_rt(rt, &info);
1662 }
1663
1664 static int ip6_route_del(struct fib6_config *cfg)
1665 {
1666         struct fib6_table *table;
1667         struct fib6_node *fn;
1668         struct rt6_info *rt;
1669         int err = -ESRCH;
1670
1671         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1672         if (!table)
1673                 return err;
1674
1675         read_lock_bh(&table->tb6_lock);
1676
1677         fn = fib6_locate(&table->tb6_root,
1678                          &cfg->fc_dst, cfg->fc_dst_len,
1679                          &cfg->fc_src, cfg->fc_src_len);
1680
1681         if (fn) {
1682                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1683                         if (cfg->fc_ifindex &&
1684                             (!rt->dst.dev ||
1685                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1686                                 continue;
1687                         if (cfg->fc_flags & RTF_GATEWAY &&
1688                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1689                                 continue;
1690                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1691                                 continue;
1692                         dst_hold(&rt->dst);
1693                         read_unlock_bh(&table->tb6_lock);
1694
1695                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1696                 }
1697         }
1698         read_unlock_bh(&table->tb6_lock);
1699
1700         return err;
1701 }
1702
1703 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1704 {
1705         struct net *net = dev_net(skb->dev);
1706         struct netevent_redirect netevent;
1707         struct rt6_info *rt, *nrt = NULL;
1708         const struct in6_addr *target;
1709         struct ndisc_options ndopts;
1710         const struct in6_addr *dest;
1711         struct neighbour *old_neigh;
1712         struct inet6_dev *in6_dev;
1713         struct neighbour *neigh;
1714         struct icmp6hdr *icmph;
1715         int optlen, on_link;
1716         u8 *lladdr;
1717
1718         optlen = skb->tail - skb->transport_header;
1719         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1720
1721         if (optlen < 0) {
1722                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1723                 return;
1724         }
1725
1726         icmph = icmp6_hdr(skb);
1727         target = (const struct in6_addr *) (icmph + 1);
1728         dest = target + 1;
1729
1730         if (ipv6_addr_is_multicast(dest)) {
1731                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1732                 return;
1733         }
1734
1735         on_link = 0;
1736         if (ipv6_addr_equal(dest, target)) {
1737                 on_link = 1;
1738         } else if (ipv6_addr_type(target) !=
1739                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1740                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1741                 return;
1742         }
1743
1744         in6_dev = __in6_dev_get(skb->dev);
1745         if (!in6_dev)
1746                 return;
1747         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1748                 return;
1749
1750         /* RFC2461 8.1:
1751          *      The IP source address of the Redirect MUST be the same as the current
1752          *      first-hop router for the specified ICMP Destination Address.
1753          */
1754
1755         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1756                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1757                 return;
1758         }
1759
1760         lladdr = NULL;
1761         if (ndopts.nd_opts_tgt_lladdr) {
1762                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1763                                              skb->dev);
1764                 if (!lladdr) {
1765                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1766                         return;
1767                 }
1768         }
1769
1770         rt = (struct rt6_info *) dst;
1771         if (rt == net->ipv6.ip6_null_entry) {
1772                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1773                 return;
1774         }
1775
1776         /* Redirect received -> path was valid.
1777          * Look, redirects are sent only in response to data packets,
1778          * so that this nexthop apparently is reachable. --ANK
1779          */
1780         dst_confirm(&rt->dst);
1781
1782         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1783         if (!neigh)
1784                 return;
1785
1786         /* Duplicate redirect: silently ignore. */
1787         old_neigh = rt->n;
1788         if (neigh == old_neigh)
1789                 goto out;
1790
1791         /*
1792          *      We have finally decided to accept it.
1793          */
1794
1795         neigh_update(neigh, lladdr, NUD_STALE,
1796                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1797                      NEIGH_UPDATE_F_OVERRIDE|
1798                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1799                                      NEIGH_UPDATE_F_ISROUTER))
1800                      );
1801
1802         nrt = ip6_rt_copy(rt, dest);
1803         if (!nrt)
1804                 goto out;
1805
1806         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1807         if (on_link)
1808                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1809
1810         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1811         nrt->n = neigh_clone(neigh);
1812
1813         if (ip6_ins_rt(nrt))
1814                 goto out;
1815
1816         netevent.old = &rt->dst;
1817         netevent.old_neigh = old_neigh;
1818         netevent.new = &nrt->dst;
1819         netevent.new_neigh = neigh;
1820         netevent.daddr = dest;
1821         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1822
1823         if (rt->rt6i_flags & RTF_CACHE) {
1824                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1825                 ip6_del_rt(rt);
1826         }
1827
1828 out:
1829         neigh_release(neigh);
1830 }
1831
1832 /*
1833  *      Misc support functions
1834  */
1835
1836 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1837                                     const struct in6_addr *dest)
1838 {
1839         struct net *net = dev_net(ort->dst.dev);
1840         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1841                                             ort->rt6i_table);
1842
1843         if (rt) {
1844                 rt->dst.input = ort->dst.input;
1845                 rt->dst.output = ort->dst.output;
1846                 rt->dst.flags |= DST_HOST;
1847
1848                 rt->rt6i_dst.addr = *dest;
1849                 rt->rt6i_dst.plen = 128;
1850                 dst_copy_metrics(&rt->dst, &ort->dst);
1851                 rt->dst.error = ort->dst.error;
1852                 rt->rt6i_idev = ort->rt6i_idev;
1853                 if (rt->rt6i_idev)
1854                         in6_dev_hold(rt->rt6i_idev);
1855                 rt->dst.lastuse = jiffies;
1856
1857                 rt->rt6i_gateway = ort->rt6i_gateway;
1858                 rt->rt6i_flags = ort->rt6i_flags;
1859                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1860                     (RTF_DEFAULT | RTF_ADDRCONF))
1861                         rt6_set_from(rt, ort);
1862                 else
1863                         rt6_clean_expires(rt);
1864                 rt->rt6i_metric = 0;
1865
1866 #ifdef CONFIG_IPV6_SUBTREES
1867                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1868 #endif
1869                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1870                 rt->rt6i_table = ort->rt6i_table;
1871         }
1872         return rt;
1873 }
1874
1875 #ifdef CONFIG_IPV6_ROUTE_INFO
1876 static struct rt6_info *rt6_get_route_info(struct net *net,
1877                                            const struct in6_addr *prefix, int prefixlen,
1878                                            const struct in6_addr *gwaddr, int ifindex)
1879 {
1880         struct fib6_node *fn;
1881         struct rt6_info *rt = NULL;
1882         struct fib6_table *table;
1883
1884         table = fib6_get_table(net, RT6_TABLE_INFO);
1885         if (!table)
1886                 return NULL;
1887
1888         read_lock_bh(&table->tb6_lock);
1889         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1890         if (!fn)
1891                 goto out;
1892
1893         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1894                 if (rt->dst.dev->ifindex != ifindex)
1895                         continue;
1896                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1897                         continue;
1898                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1899                         continue;
1900                 dst_hold(&rt->dst);
1901                 break;
1902         }
1903 out:
1904         read_unlock_bh(&table->tb6_lock);
1905         return rt;
1906 }
1907
1908 static struct rt6_info *rt6_add_route_info(struct net *net,
1909                                            const struct in6_addr *prefix, int prefixlen,
1910                                            const struct in6_addr *gwaddr, int ifindex,
1911                                            unsigned int pref)
1912 {
1913         struct fib6_config cfg = {
1914                 .fc_table       = RT6_TABLE_INFO,
1915                 .fc_metric      = IP6_RT_PRIO_USER,
1916                 .fc_ifindex     = ifindex,
1917                 .fc_dst_len     = prefixlen,
1918                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1919                                   RTF_UP | RTF_PREF(pref),
1920                 .fc_nlinfo.portid = 0,
1921                 .fc_nlinfo.nlh = NULL,
1922                 .fc_nlinfo.nl_net = net,
1923         };
1924
1925         cfg.fc_dst = *prefix;
1926         cfg.fc_gateway = *gwaddr;
1927
1928         /* We should treat it as a default route if prefix length is 0. */
1929         if (!prefixlen)
1930                 cfg.fc_flags |= RTF_DEFAULT;
1931
1932         ip6_route_add(&cfg);
1933
1934         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1935 }
1936 #endif
1937
1938 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1939 {
1940         struct rt6_info *rt;
1941         struct fib6_table *table;
1942
1943         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1944         if (!table)
1945                 return NULL;
1946
1947         read_lock_bh(&table->tb6_lock);
1948         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1949                 if (dev == rt->dst.dev &&
1950                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1951                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1952                         break;
1953         }
1954         if (rt)
1955                 dst_hold(&rt->dst);
1956         read_unlock_bh(&table->tb6_lock);
1957         return rt;
1958 }
1959
1960 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1961                                      struct net_device *dev,
1962                                      unsigned int pref)
1963 {
1964         struct fib6_config cfg = {
1965                 .fc_table       = RT6_TABLE_DFLT,
1966                 .fc_metric      = IP6_RT_PRIO_USER,
1967                 .fc_ifindex     = dev->ifindex,
1968                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1969                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1970                 .fc_nlinfo.portid = 0,
1971                 .fc_nlinfo.nlh = NULL,
1972                 .fc_nlinfo.nl_net = dev_net(dev),
1973         };
1974
1975         cfg.fc_gateway = *gwaddr;
1976
1977         ip6_route_add(&cfg);
1978
1979         return rt6_get_dflt_router(gwaddr, dev);
1980 }
1981
1982 void rt6_purge_dflt_routers(struct net *net)
1983 {
1984         struct rt6_info *rt;
1985         struct fib6_table *table;
1986
1987         /* NOTE: Keep consistent with rt6_get_dflt_router */
1988         table = fib6_get_table(net, RT6_TABLE_DFLT);
1989         if (!table)
1990                 return;
1991
1992 restart:
1993         read_lock_bh(&table->tb6_lock);
1994         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1995                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1996                         dst_hold(&rt->dst);
1997                         read_unlock_bh(&table->tb6_lock);
1998                         ip6_del_rt(rt);
1999                         goto restart;
2000                 }
2001         }
2002         read_unlock_bh(&table->tb6_lock);
2003 }
2004
2005 static void rtmsg_to_fib6_config(struct net *net,
2006                                  struct in6_rtmsg *rtmsg,
2007                                  struct fib6_config *cfg)
2008 {
2009         memset(cfg, 0, sizeof(*cfg));
2010
2011         cfg->fc_table = RT6_TABLE_MAIN;
2012         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2013         cfg->fc_metric = rtmsg->rtmsg_metric;
2014         cfg->fc_expires = rtmsg->rtmsg_info;
2015         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2016         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2017         cfg->fc_flags = rtmsg->rtmsg_flags;
2018
2019         cfg->fc_nlinfo.nl_net = net;
2020
2021         cfg->fc_dst = rtmsg->rtmsg_dst;
2022         cfg->fc_src = rtmsg->rtmsg_src;
2023         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2024 }
2025
2026 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2027 {
2028         struct fib6_config cfg;
2029         struct in6_rtmsg rtmsg;
2030         int err;
2031
2032         switch(cmd) {
2033         case SIOCADDRT:         /* Add a route */
2034         case SIOCDELRT:         /* Delete a route */
2035                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2036                         return -EPERM;
2037                 err = copy_from_user(&rtmsg, arg,
2038                                      sizeof(struct in6_rtmsg));
2039                 if (err)
2040                         return -EFAULT;
2041
2042                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2043
2044                 rtnl_lock();
2045                 switch (cmd) {
2046                 case SIOCADDRT:
2047                         err = ip6_route_add(&cfg);
2048                         break;
2049                 case SIOCDELRT:
2050                         err = ip6_route_del(&cfg);
2051                         break;
2052                 default:
2053                         err = -EINVAL;
2054                 }
2055                 rtnl_unlock();
2056
2057                 return err;
2058         }
2059
2060         return -EINVAL;
2061 }
2062
2063 /*
2064  *      Drop the packet on the floor
2065  */
2066
2067 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2068 {
2069         int type;
2070         struct dst_entry *dst = skb_dst(skb);
2071         switch (ipstats_mib_noroutes) {
2072         case IPSTATS_MIB_INNOROUTES:
2073                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2074                 if (type == IPV6_ADDR_ANY) {
2075                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2076                                       IPSTATS_MIB_INADDRERRORS);
2077                         break;
2078                 }
2079                 /* FALLTHROUGH */
2080         case IPSTATS_MIB_OUTNOROUTES:
2081                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2082                               ipstats_mib_noroutes);
2083                 break;
2084         }
2085         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2086         kfree_skb(skb);
2087         return 0;
2088 }
2089
2090 static int ip6_pkt_discard(struct sk_buff *skb)
2091 {
2092         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2093 }
2094
2095 static int ip6_pkt_discard_out(struct sk_buff *skb)
2096 {
2097         skb->dev = skb_dst(skb)->dev;
2098         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2099 }
2100
2101 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2102
2103 static int ip6_pkt_prohibit(struct sk_buff *skb)
2104 {
2105         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2106 }
2107
2108 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2109 {
2110         skb->dev = skb_dst(skb)->dev;
2111         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2112 }
2113
2114 #endif
2115
2116 /*
2117  *      Allocate a dst for local (unicast / anycast) address.
2118  */
2119
2120 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2121                                     const struct in6_addr *addr,
2122                                     bool anycast)
2123 {
2124         struct net *net = dev_net(idev->dev);
2125         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2126         int err;
2127
2128         if (!rt) {
2129                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2130                 return ERR_PTR(-ENOMEM);
2131         }
2132
2133         in6_dev_hold(idev);
2134
2135         rt->dst.flags |= DST_HOST;
2136         rt->dst.input = ip6_input;
2137         rt->dst.output = ip6_output;
2138         rt->rt6i_idev = idev;
2139
2140         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2141         if (anycast)
2142                 rt->rt6i_flags |= RTF_ANYCAST;
2143         else
2144                 rt->rt6i_flags |= RTF_LOCAL;
2145         err = rt6_bind_neighbour(rt, rt->dst.dev);
2146         if (err) {
2147                 dst_free(&rt->dst);
2148                 return ERR_PTR(err);
2149         }
2150
2151         rt->rt6i_dst.addr = *addr;
2152         rt->rt6i_dst.plen = 128;
2153         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2154
2155         atomic_set(&rt->dst.__refcnt, 1);
2156
2157         return rt;
2158 }
2159
2160 int ip6_route_get_saddr(struct net *net,
2161                         struct rt6_info *rt,
2162                         const struct in6_addr *daddr,
2163                         unsigned int prefs,
2164                         struct in6_addr *saddr)
2165 {
2166         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2167         int err = 0;
2168         if (rt->rt6i_prefsrc.plen)
2169                 *saddr = rt->rt6i_prefsrc.addr;
2170         else
2171                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2172                                          daddr, prefs, saddr);
2173         return err;
2174 }
2175
2176 /* remove deleted ip from prefsrc entries */
2177 struct arg_dev_net_ip {
2178         struct net_device *dev;
2179         struct net *net;
2180         struct in6_addr *addr;
2181 };
2182
2183 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2184 {
2185         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2186         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2187         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2188
2189         if (((void *)rt->dst.dev == dev || !dev) &&
2190             rt != net->ipv6.ip6_null_entry &&
2191             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2192                 /* remove prefsrc entry */
2193                 rt->rt6i_prefsrc.plen = 0;
2194         }
2195         return 0;
2196 }
2197
2198 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2199 {
2200         struct net *net = dev_net(ifp->idev->dev);
2201         struct arg_dev_net_ip adni = {
2202                 .dev = ifp->idev->dev,
2203                 .net = net,
2204                 .addr = &ifp->addr,
2205         };
2206         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2207 }
2208
2209 struct arg_dev_net {
2210         struct net_device *dev;
2211         struct net *net;
2212 };
2213
2214 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2215 {
2216         const struct arg_dev_net *adn = arg;
2217         const struct net_device *dev = adn->dev;
2218
2219         if ((rt->dst.dev == dev || !dev) &&
2220             rt != adn->net->ipv6.ip6_null_entry)
2221                 return -1;
2222
2223         return 0;
2224 }
2225
2226 void rt6_ifdown(struct net *net, struct net_device *dev)
2227 {
2228         struct arg_dev_net adn = {
2229                 .dev = dev,
2230                 .net = net,
2231         };
2232
2233         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2234         icmp6_clean_all(fib6_ifdown, &adn);
2235 }
2236
2237 struct rt6_mtu_change_arg {
2238         struct net_device *dev;
2239         unsigned int mtu;
2240 };
2241
2242 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2243 {
2244         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2245         struct inet6_dev *idev;
2246
2247         /* In IPv6 pmtu discovery is not optional,
2248            so that RTAX_MTU lock cannot disable it.
2249            We still use this lock to block changes
2250            caused by addrconf/ndisc.
2251         */
2252
2253         idev = __in6_dev_get(arg->dev);
2254         if (!idev)
2255                 return 0;
2256
2257         /* For administrative MTU increase, there is no way to discover
2258            IPv6 PMTU increase, so PMTU increase should be updated here.
2259            Since RFC 1981 doesn't include administrative MTU increase
2260            update PMTU increase is a MUST. (i.e. jumbo frame)
2261          */
2262         /*
2263            If new MTU is less than route PMTU, this new MTU will be the
2264            lowest MTU in the path, update the route PMTU to reflect PMTU
2265            decreases; if new MTU is greater than route PMTU, and the
2266            old MTU is the lowest MTU in the path, update the route PMTU
2267            to reflect the increase. In this case if the other nodes' MTU
2268            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2269            PMTU discouvery.
2270          */
2271         if (rt->dst.dev == arg->dev &&
2272             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2273             (dst_mtu(&rt->dst) >= arg->mtu ||
2274              (dst_mtu(&rt->dst) < arg->mtu &&
2275               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2276                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2277         }
2278         return 0;
2279 }
2280
2281 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2282 {
2283         struct rt6_mtu_change_arg arg = {
2284                 .dev = dev,
2285                 .mtu = mtu,
2286         };
2287
2288         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2289 }
2290
2291 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2292         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2293         [RTA_OIF]               = { .type = NLA_U32 },
2294         [RTA_IIF]               = { .type = NLA_U32 },
2295         [RTA_PRIORITY]          = { .type = NLA_U32 },
2296         [RTA_METRICS]           = { .type = NLA_NESTED },
2297         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2298 };
2299
2300 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2301                               struct fib6_config *cfg)
2302 {
2303         struct rtmsg *rtm;
2304         struct nlattr *tb[RTA_MAX+1];
2305         int err;
2306
2307         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2308         if (err < 0)
2309                 goto errout;
2310
2311         err = -EINVAL;
2312         rtm = nlmsg_data(nlh);
2313         memset(cfg, 0, sizeof(*cfg));
2314
2315         cfg->fc_table = rtm->rtm_table;
2316         cfg->fc_dst_len = rtm->rtm_dst_len;
2317         cfg->fc_src_len = rtm->rtm_src_len;
2318         cfg->fc_flags = RTF_UP;
2319         cfg->fc_protocol = rtm->rtm_protocol;
2320         cfg->fc_type = rtm->rtm_type;
2321
2322         if (rtm->rtm_type == RTN_UNREACHABLE ||
2323             rtm->rtm_type == RTN_BLACKHOLE ||
2324             rtm->rtm_type == RTN_PROHIBIT ||
2325             rtm->rtm_type == RTN_THROW)
2326                 cfg->fc_flags |= RTF_REJECT;
2327
2328         if (rtm->rtm_type == RTN_LOCAL)
2329                 cfg->fc_flags |= RTF_LOCAL;
2330
2331         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2332         cfg->fc_nlinfo.nlh = nlh;
2333         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2334
2335         if (tb[RTA_GATEWAY]) {
2336                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2337                 cfg->fc_flags |= RTF_GATEWAY;
2338         }
2339
2340         if (tb[RTA_DST]) {
2341                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2342
2343                 if (nla_len(tb[RTA_DST]) < plen)
2344                         goto errout;
2345
2346                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2347         }
2348
2349         if (tb[RTA_SRC]) {
2350                 int plen = (rtm->rtm_src_len + 7) >> 3;
2351
2352                 if (nla_len(tb[RTA_SRC]) < plen)
2353                         goto errout;
2354
2355                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2356         }
2357
2358         if (tb[RTA_PREFSRC])
2359                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2360
2361         if (tb[RTA_OIF])
2362                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2363
2364         if (tb[RTA_PRIORITY])
2365                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2366
2367         if (tb[RTA_METRICS]) {
2368                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2369                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2370         }
2371
2372         if (tb[RTA_TABLE])
2373                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2374
2375         if (tb[RTA_MULTIPATH]) {
2376                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2377                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2378         }
2379
2380         err = 0;
2381 errout:
2382         return err;
2383 }
2384
2385 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2386 {
2387         struct fib6_config r_cfg;
2388         struct rtnexthop *rtnh;
2389         int remaining;
2390         int attrlen;
2391         int err = 0, last_err = 0;
2392
2393 beginning:
2394         rtnh = (struct rtnexthop *)cfg->fc_mp;
2395         remaining = cfg->fc_mp_len;
2396
2397         /* Parse a Multipath Entry */
2398         while (rtnh_ok(rtnh, remaining)) {
2399                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2400                 if (rtnh->rtnh_ifindex)
2401                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2402
2403                 attrlen = rtnh_attrlen(rtnh);
2404                 if (attrlen > 0) {
2405                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2406
2407                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2408                         if (nla) {
2409                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2410                                 r_cfg.fc_flags |= RTF_GATEWAY;
2411                         }
2412                 }
2413                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2414                 if (err) {
2415                         last_err = err;
2416                         /* If we are trying to remove a route, do not stop the
2417                          * loop when ip6_route_del() fails (because next hop is
2418                          * already gone), we should try to remove all next hops.
2419                          */
2420                         if (add) {
2421                                 /* If add fails, we should try to delete all
2422                                  * next hops that have been already added.
2423                                  */
2424                                 add = 0;
2425                                 goto beginning;
2426                         }
2427                 }
2428                 /* Because each route is added like a single route we remove
2429                  * this flag after the first nexthop (if there is a collision,
2430                  * we have already fail to add the first nexthop:
2431                  * fib6_add_rt2node() has reject it).
2432                  */
2433                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2434                 rtnh = rtnh_next(rtnh, &remaining);
2435         }
2436
2437         return last_err;
2438 }
2439
2440 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2441 {
2442         struct fib6_config cfg;
2443         int err;
2444
2445         err = rtm_to_fib6_config(skb, nlh, &cfg);
2446         if (err < 0)
2447                 return err;
2448
2449         if (cfg.fc_mp)
2450                 return ip6_route_multipath(&cfg, 0);
2451         else
2452                 return ip6_route_del(&cfg);
2453 }
2454
2455 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2456 {
2457         struct fib6_config cfg;
2458         int err;
2459
2460         err = rtm_to_fib6_config(skb, nlh, &cfg);
2461         if (err < 0)
2462                 return err;
2463
2464         if (cfg.fc_mp)
2465                 return ip6_route_multipath(&cfg, 1);
2466         else
2467                 return ip6_route_add(&cfg);
2468 }
2469
2470 static inline size_t rt6_nlmsg_size(void)
2471 {
2472         return NLMSG_ALIGN(sizeof(struct rtmsg))
2473                + nla_total_size(16) /* RTA_SRC */
2474                + nla_total_size(16) /* RTA_DST */
2475                + nla_total_size(16) /* RTA_GATEWAY */
2476                + nla_total_size(16) /* RTA_PREFSRC */
2477                + nla_total_size(4) /* RTA_TABLE */
2478                + nla_total_size(4) /* RTA_IIF */
2479                + nla_total_size(4) /* RTA_OIF */
2480                + nla_total_size(4) /* RTA_PRIORITY */
2481                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2482                + nla_total_size(sizeof(struct rta_cacheinfo));
2483 }
2484
2485 static int rt6_fill_node(struct net *net,
2486                          struct sk_buff *skb, struct rt6_info *rt,
2487                          struct in6_addr *dst, struct in6_addr *src,
2488                          int iif, int type, u32 portid, u32 seq,
2489                          int prefix, int nowait, unsigned int flags)
2490 {
2491         struct rtmsg *rtm;
2492         struct nlmsghdr *nlh;
2493         long expires;
2494         u32 table;
2495         struct neighbour *n;
2496
2497         if (prefix) {   /* user wants prefix routes only */
2498                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2499                         /* success since this is not a prefix route */
2500                         return 1;
2501                 }
2502         }
2503
2504         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2505         if (!nlh)
2506                 return -EMSGSIZE;
2507
2508         rtm = nlmsg_data(nlh);
2509         rtm->rtm_family = AF_INET6;
2510         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2511         rtm->rtm_src_len = rt->rt6i_src.plen;
2512         rtm->rtm_tos = 0;
2513         if (rt->rt6i_table)
2514                 table = rt->rt6i_table->tb6_id;
2515         else
2516                 table = RT6_TABLE_UNSPEC;
2517         rtm->rtm_table = table;
2518         if (nla_put_u32(skb, RTA_TABLE, table))
2519                 goto nla_put_failure;
2520         if (rt->rt6i_flags & RTF_REJECT) {
2521                 switch (rt->dst.error) {
2522                 case -EINVAL:
2523                         rtm->rtm_type = RTN_BLACKHOLE;
2524                         break;
2525                 case -EACCES:
2526                         rtm->rtm_type = RTN_PROHIBIT;
2527                         break;
2528                 case -EAGAIN:
2529                         rtm->rtm_type = RTN_THROW;
2530                         break;
2531                 default:
2532                         rtm->rtm_type = RTN_UNREACHABLE;
2533                         break;
2534                 }
2535         }
2536         else if (rt->rt6i_flags & RTF_LOCAL)
2537                 rtm->rtm_type = RTN_LOCAL;
2538         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2539                 rtm->rtm_type = RTN_LOCAL;
2540         else
2541                 rtm->rtm_type = RTN_UNICAST;
2542         rtm->rtm_flags = 0;
2543         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2544         rtm->rtm_protocol = rt->rt6i_protocol;
2545         if (rt->rt6i_flags & RTF_DYNAMIC)
2546                 rtm->rtm_protocol = RTPROT_REDIRECT;
2547         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2548                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2549                         rtm->rtm_protocol = RTPROT_RA;
2550                 else
2551                         rtm->rtm_protocol = RTPROT_KERNEL;
2552         }
2553
2554         if (rt->rt6i_flags & RTF_CACHE)
2555                 rtm->rtm_flags |= RTM_F_CLONED;
2556
2557         if (dst) {
2558                 if (nla_put(skb, RTA_DST, 16, dst))
2559                         goto nla_put_failure;
2560                 rtm->rtm_dst_len = 128;
2561         } else if (rtm->rtm_dst_len)
2562                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2563                         goto nla_put_failure;
2564 #ifdef CONFIG_IPV6_SUBTREES
2565         if (src) {
2566                 if (nla_put(skb, RTA_SRC, 16, src))
2567                         goto nla_put_failure;
2568                 rtm->rtm_src_len = 128;
2569         } else if (rtm->rtm_src_len &&
2570                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2571                 goto nla_put_failure;
2572 #endif
2573         if (iif) {
2574 #ifdef CONFIG_IPV6_MROUTE
2575                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2576                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2577                         if (err <= 0) {
2578                                 if (!nowait) {
2579                                         if (err == 0)
2580                                                 return 0;
2581                                         goto nla_put_failure;
2582                                 } else {
2583                                         if (err == -EMSGSIZE)
2584                                                 goto nla_put_failure;
2585                                 }
2586                         }
2587                 } else
2588 #endif
2589                         if (nla_put_u32(skb, RTA_IIF, iif))
2590                                 goto nla_put_failure;
2591         } else if (dst) {
2592                 struct in6_addr saddr_buf;
2593                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2594                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2595                         goto nla_put_failure;
2596         }
2597
2598         if (rt->rt6i_prefsrc.plen) {
2599                 struct in6_addr saddr_buf;
2600                 saddr_buf = rt->rt6i_prefsrc.addr;
2601                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2602                         goto nla_put_failure;
2603         }
2604
2605         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2606                 goto nla_put_failure;
2607
2608         n = rt->n;
2609         if (n) {
2610                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2611                         goto nla_put_failure;
2612         }
2613
2614         if (rt->dst.dev &&
2615             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2616                 goto nla_put_failure;
2617         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2618                 goto nla_put_failure;
2619
2620         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2621
2622         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2623                 goto nla_put_failure;
2624
2625         return nlmsg_end(skb, nlh);
2626
2627 nla_put_failure:
2628         nlmsg_cancel(skb, nlh);
2629         return -EMSGSIZE;
2630 }
2631
2632 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2633 {
2634         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2635         int prefix;
2636
2637         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2638                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2639                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2640         } else
2641                 prefix = 0;
2642
2643         return rt6_fill_node(arg->net,
2644                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2645                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2646                      prefix, 0, NLM_F_MULTI);
2647 }
2648
2649 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2650 {
2651         struct net *net = sock_net(in_skb->sk);
2652         struct nlattr *tb[RTA_MAX+1];
2653         struct rt6_info *rt;
2654         struct sk_buff *skb;
2655         struct rtmsg *rtm;
2656         struct flowi6 fl6;
2657         int err, iif = 0, oif = 0;
2658
2659         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2660         if (err < 0)
2661                 goto errout;
2662
2663         err = -EINVAL;
2664         memset(&fl6, 0, sizeof(fl6));
2665
2666         if (tb[RTA_SRC]) {
2667                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2668                         goto errout;
2669
2670                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2671         }
2672
2673         if (tb[RTA_DST]) {
2674                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2675                         goto errout;
2676
2677                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2678         }
2679
2680         if (tb[RTA_IIF])
2681                 iif = nla_get_u32(tb[RTA_IIF]);
2682
2683         if (tb[RTA_OIF])
2684                 oif = nla_get_u32(tb[RTA_OIF]);
2685
2686         if (iif) {
2687                 struct net_device *dev;
2688                 int flags = 0;
2689
2690                 dev = __dev_get_by_index(net, iif);
2691                 if (!dev) {
2692                         err = -ENODEV;
2693                         goto errout;
2694                 }
2695
2696                 fl6.flowi6_iif = iif;
2697
2698                 if (!ipv6_addr_any(&fl6.saddr))
2699                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2700
2701                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2702                                                                flags);
2703         } else {
2704                 fl6.flowi6_oif = oif;
2705
2706                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2707         }
2708
2709         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2710         if (!skb) {
2711                 ip6_rt_put(rt);
2712                 err = -ENOBUFS;
2713                 goto errout;
2714         }
2715
2716         /* Reserve room for dummy headers, this skb can pass
2717            through good chunk of routing engine.
2718          */
2719         skb_reset_mac_header(skb);
2720         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2721
2722         skb_dst_set(skb, &rt->dst);
2723
2724         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2725                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2726                             nlh->nlmsg_seq, 0, 0, 0);
2727         if (err < 0) {
2728                 kfree_skb(skb);
2729                 goto errout;
2730         }
2731
2732         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2733 errout:
2734         return err;
2735 }
2736
2737 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2738 {
2739         struct sk_buff *skb;
2740         struct net *net = info->nl_net;
2741         u32 seq;
2742         int err;
2743
2744         err = -ENOBUFS;
2745         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2746
2747         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2748         if (!skb)
2749                 goto errout;
2750
2751         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2752                                 event, info->portid, seq, 0, 0, 0);
2753         if (err < 0) {
2754                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2755                 WARN_ON(err == -EMSGSIZE);
2756                 kfree_skb(skb);
2757                 goto errout;
2758         }
2759         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2760                     info->nlh, gfp_any());
2761         return;
2762 errout:
2763         if (err < 0)
2764                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2765 }
2766
2767 static int ip6_route_dev_notify(struct notifier_block *this,
2768                                 unsigned long event, void *data)
2769 {
2770         struct net_device *dev = (struct net_device *)data;
2771         struct net *net = dev_net(dev);
2772
2773         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2774                 net->ipv6.ip6_null_entry->dst.dev = dev;
2775                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2776 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2777                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2778                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2779                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2780                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2781 #endif
2782         }
2783
2784         return NOTIFY_OK;
2785 }
2786
2787 /*
2788  *      /proc
2789  */
2790
2791 #ifdef CONFIG_PROC_FS
2792
2793 struct rt6_proc_arg
2794 {
2795         char *buffer;
2796         int offset;
2797         int length;
2798         int skip;
2799         int len;
2800 };
2801
2802 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2803 {
2804         struct seq_file *m = p_arg;
2805         struct neighbour *n;
2806
2807         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2808
2809 #ifdef CONFIG_IPV6_SUBTREES
2810         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2811 #else
2812         seq_puts(m, "00000000000000000000000000000000 00 ");
2813 #endif
2814         n = rt->n;
2815         if (n) {
2816                 seq_printf(m, "%pi6", n->primary_key);
2817         } else {
2818                 seq_puts(m, "00000000000000000000000000000000");
2819         }
2820         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2821                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2822                    rt->dst.__use, rt->rt6i_flags,
2823                    rt->dst.dev ? rt->dst.dev->name : "");
2824         return 0;
2825 }
2826
2827 static int ipv6_route_show(struct seq_file *m, void *v)
2828 {
2829         struct net *net = (struct net *)m->private;
2830         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2831         return 0;
2832 }
2833
2834 static int ipv6_route_open(struct inode *inode, struct file *file)
2835 {
2836         return single_open_net(inode, file, ipv6_route_show);
2837 }
2838
2839 static const struct file_operations ipv6_route_proc_fops = {
2840         .owner          = THIS_MODULE,
2841         .open           = ipv6_route_open,
2842         .read           = seq_read,
2843         .llseek         = seq_lseek,
2844         .release        = single_release_net,
2845 };
2846
2847 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2848 {
2849         struct net *net = (struct net *)seq->private;
2850         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2851                    net->ipv6.rt6_stats->fib_nodes,
2852                    net->ipv6.rt6_stats->fib_route_nodes,
2853                    net->ipv6.rt6_stats->fib_rt_alloc,
2854                    net->ipv6.rt6_stats->fib_rt_entries,
2855                    net->ipv6.rt6_stats->fib_rt_cache,
2856                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2857                    net->ipv6.rt6_stats->fib_discarded_routes);
2858
2859         return 0;
2860 }
2861
2862 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2863 {
2864         return single_open_net(inode, file, rt6_stats_seq_show);
2865 }
2866
2867 static const struct file_operations rt6_stats_seq_fops = {
2868         .owner   = THIS_MODULE,
2869         .open    = rt6_stats_seq_open,
2870         .read    = seq_read,
2871         .llseek  = seq_lseek,
2872         .release = single_release_net,
2873 };
2874 #endif  /* CONFIG_PROC_FS */
2875
2876 #ifdef CONFIG_SYSCTL
2877
2878 static
2879 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2880                               void __user *buffer, size_t *lenp, loff_t *ppos)
2881 {
2882         struct net *net;
2883         int delay;
2884         if (!write)
2885                 return -EINVAL;
2886
2887         net = (struct net *)ctl->extra1;
2888         delay = net->ipv6.sysctl.flush_delay;
2889         proc_dointvec(ctl, write, buffer, lenp, ppos);
2890         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2891         return 0;
2892 }
2893
2894 ctl_table ipv6_route_table_template[] = {
2895         {
2896                 .procname       =       "flush",
2897                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2898                 .maxlen         =       sizeof(int),
2899                 .mode           =       0200,
2900                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2901         },
2902         {
2903                 .procname       =       "gc_thresh",
2904                 .data           =       &ip6_dst_ops_template.gc_thresh,
2905                 .maxlen         =       sizeof(int),
2906                 .mode           =       0644,
2907                 .proc_handler   =       proc_dointvec,
2908         },
2909         {
2910                 .procname       =       "max_size",
2911                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2912                 .maxlen         =       sizeof(int),
2913                 .mode           =       0644,
2914                 .proc_handler   =       proc_dointvec,
2915         },
2916         {
2917                 .procname       =       "gc_min_interval",
2918                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2919                 .maxlen         =       sizeof(int),
2920                 .mode           =       0644,
2921                 .proc_handler   =       proc_dointvec_jiffies,
2922         },
2923         {
2924                 .procname       =       "gc_timeout",
2925                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2926                 .maxlen         =       sizeof(int),
2927                 .mode           =       0644,
2928                 .proc_handler   =       proc_dointvec_jiffies,
2929         },
2930         {
2931                 .procname       =       "gc_interval",
2932                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2933                 .maxlen         =       sizeof(int),
2934                 .mode           =       0644,
2935                 .proc_handler   =       proc_dointvec_jiffies,
2936         },
2937         {
2938                 .procname       =       "gc_elasticity",
2939                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2940                 .maxlen         =       sizeof(int),
2941                 .mode           =       0644,
2942                 .proc_handler   =       proc_dointvec,
2943         },
2944         {
2945                 .procname       =       "mtu_expires",
2946                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2947                 .maxlen         =       sizeof(int),
2948                 .mode           =       0644,
2949                 .proc_handler   =       proc_dointvec_jiffies,
2950         },
2951         {
2952                 .procname       =       "min_adv_mss",
2953                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2954                 .maxlen         =       sizeof(int),
2955                 .mode           =       0644,
2956                 .proc_handler   =       proc_dointvec,
2957         },
2958         {
2959                 .procname       =       "gc_min_interval_ms",
2960                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2961                 .maxlen         =       sizeof(int),
2962                 .mode           =       0644,
2963                 .proc_handler   =       proc_dointvec_ms_jiffies,
2964         },
2965         { }
2966 };
2967
2968 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2969 {
2970         struct ctl_table *table;
2971
2972         table = kmemdup(ipv6_route_table_template,
2973                         sizeof(ipv6_route_table_template),
2974                         GFP_KERNEL);
2975
2976         if (table) {
2977                 table[0].data = &net->ipv6.sysctl.flush_delay;
2978                 table[0].extra1 = net;
2979                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2980                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2981                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2982                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2983                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2984                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2985                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2986                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2987                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2988
2989                 /* Don't export sysctls to unprivileged users */
2990                 if (net->user_ns != &init_user_ns)
2991                         table[0].procname = NULL;
2992         }
2993
2994         return table;
2995 }
2996 #endif
2997
2998 static int __net_init ip6_route_net_init(struct net *net)
2999 {
3000         int ret = -ENOMEM;
3001
3002         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3003                sizeof(net->ipv6.ip6_dst_ops));
3004
3005         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3006                 goto out_ip6_dst_ops;
3007
3008         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3009                                            sizeof(*net->ipv6.ip6_null_entry),
3010                                            GFP_KERNEL);
3011         if (!net->ipv6.ip6_null_entry)
3012                 goto out_ip6_dst_entries;
3013         net->ipv6.ip6_null_entry->dst.path =
3014                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3015         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3016         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3017                          ip6_template_metrics, true);
3018
3019 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3020         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3021                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3022                                                GFP_KERNEL);
3023         if (!net->ipv6.ip6_prohibit_entry)
3024                 goto out_ip6_null_entry;
3025         net->ipv6.ip6_prohibit_entry->dst.path =
3026                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3027         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3028         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3029                          ip6_template_metrics, true);
3030
3031         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3032                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3033                                                GFP_KERNEL);
3034         if (!net->ipv6.ip6_blk_hole_entry)
3035                 goto out_ip6_prohibit_entry;
3036         net->ipv6.ip6_blk_hole_entry->dst.path =
3037                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3038         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3039         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3040                          ip6_template_metrics, true);
3041 #endif
3042
3043         net->ipv6.sysctl.flush_delay = 0;
3044         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3045         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3046         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3047         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3048         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3049         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3050         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3051
3052         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3053
3054         ret = 0;
3055 out:
3056         return ret;
3057
3058 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3059 out_ip6_prohibit_entry:
3060         kfree(net->ipv6.ip6_prohibit_entry);
3061 out_ip6_null_entry:
3062         kfree(net->ipv6.ip6_null_entry);
3063 #endif
3064 out_ip6_dst_entries:
3065         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3066 out_ip6_dst_ops:
3067         goto out;
3068 }
3069
3070 static void __net_exit ip6_route_net_exit(struct net *net)
3071 {
3072         kfree(net->ipv6.ip6_null_entry);
3073 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3074         kfree(net->ipv6.ip6_prohibit_entry);
3075         kfree(net->ipv6.ip6_blk_hole_entry);
3076 #endif
3077         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3078 }
3079
3080 static int __net_init ip6_route_net_init_late(struct net *net)
3081 {
3082 #ifdef CONFIG_PROC_FS
3083         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3084         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3085 #endif
3086         return 0;
3087 }
3088
3089 static void __net_exit ip6_route_net_exit_late(struct net *net)
3090 {
3091 #ifdef CONFIG_PROC_FS
3092         proc_net_remove(net, "ipv6_route");
3093         proc_net_remove(net, "rt6_stats");
3094 #endif
3095 }
3096
3097 static struct pernet_operations ip6_route_net_ops = {
3098         .init = ip6_route_net_init,
3099         .exit = ip6_route_net_exit,
3100 };
3101
3102 static int __net_init ipv6_inetpeer_init(struct net *net)
3103 {
3104         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3105
3106         if (!bp)
3107                 return -ENOMEM;
3108         inet_peer_base_init(bp);
3109         net->ipv6.peers = bp;
3110         return 0;
3111 }
3112
3113 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3114 {
3115         struct inet_peer_base *bp = net->ipv6.peers;
3116
3117         net->ipv6.peers = NULL;
3118         inetpeer_invalidate_tree(bp);
3119         kfree(bp);
3120 }
3121
3122 static struct pernet_operations ipv6_inetpeer_ops = {
3123         .init   =       ipv6_inetpeer_init,
3124         .exit   =       ipv6_inetpeer_exit,
3125 };
3126
3127 static struct pernet_operations ip6_route_net_late_ops = {
3128         .init = ip6_route_net_init_late,
3129         .exit = ip6_route_net_exit_late,
3130 };
3131
3132 static struct notifier_block ip6_route_dev_notifier = {
3133         .notifier_call = ip6_route_dev_notify,
3134         .priority = 0,
3135 };
3136
3137 int __init ip6_route_init(void)
3138 {
3139         int ret;
3140
3141         ret = -ENOMEM;
3142         ip6_dst_ops_template.kmem_cachep =
3143                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3144                                   SLAB_HWCACHE_ALIGN, NULL);
3145         if (!ip6_dst_ops_template.kmem_cachep)
3146                 goto out;
3147
3148         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3149         if (ret)
3150                 goto out_kmem_cache;
3151
3152         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3153         if (ret)
3154                 goto out_dst_entries;
3155
3156         ret = register_pernet_subsys(&ip6_route_net_ops);
3157         if (ret)
3158                 goto out_register_inetpeer;
3159
3160         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3161
3162         /* Registering of the loopback is done before this portion of code,
3163          * the loopback reference in rt6_info will not be taken, do it
3164          * manually for init_net */
3165         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3166         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3167   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3168         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3169         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3170         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3171         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3172   #endif
3173         ret = fib6_init();
3174         if (ret)
3175                 goto out_register_subsys;
3176
3177         ret = xfrm6_init();
3178         if (ret)
3179                 goto out_fib6_init;
3180
3181         ret = fib6_rules_init();
3182         if (ret)
3183                 goto xfrm6_init;
3184
3185         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3186         if (ret)
3187                 goto fib6_rules_init;
3188
3189         ret = -ENOBUFS;
3190         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3191             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3192             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3193                 goto out_register_late_subsys;
3194
3195         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3196         if (ret)
3197                 goto out_register_late_subsys;
3198
3199 out:
3200         return ret;
3201
3202 out_register_late_subsys:
3203         unregister_pernet_subsys(&ip6_route_net_late_ops);
3204 fib6_rules_init:
3205         fib6_rules_cleanup();
3206 xfrm6_init:
3207         xfrm6_fini();
3208 out_fib6_init:
3209         fib6_gc_cleanup();
3210 out_register_subsys:
3211         unregister_pernet_subsys(&ip6_route_net_ops);
3212 out_register_inetpeer:
3213         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3214 out_dst_entries:
3215         dst_entries_destroy(&ip6_dst_blackhole_ops);
3216 out_kmem_cache:
3217         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3218         goto out;
3219 }
3220
3221 void ip6_route_cleanup(void)
3222 {
3223         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3224         unregister_pernet_subsys(&ip6_route_net_late_ops);
3225         fib6_rules_cleanup();
3226         xfrm6_fini();
3227         fib6_gc_cleanup();
3228         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3229         unregister_pernet_subsys(&ip6_route_net_ops);
3230         dst_entries_destroy(&ip6_dst_blackhole_ops);
3231         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3232 }