Merge branch 'for-john' of git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi...
[cascardo/linux.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69         RT6_NUD_FAIL_HARD = -3,
70         RT6_NUD_FAIL_PROBE = -2,
71         RT6_NUD_FAIL_DO_RR = -1,
72         RT6_NUD_SUCCEED = 1
73 };
74
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76                                     const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int      ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void             ip6_dst_destroy(struct dst_entry *);
82 static void             ip6_dst_ifdown(struct dst_entry *,
83                                        struct net_device *dev, int how);
84 static int               ip6_dst_gc(struct dst_ops *ops);
85
86 static int              ip6_pkt_discard(struct sk_buff *skb);
87 static int              ip6_pkt_discard_out(struct sk_buff *skb);
88 static int              ip6_pkt_prohibit(struct sk_buff *skb);
89 static int              ip6_pkt_prohibit_out(struct sk_buff *skb);
90 static void             ip6_link_failure(struct sk_buff *skb);
91 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92                                            struct sk_buff *skb, u32 mtu);
93 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94                                         struct sk_buff *skb);
95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct net *net,
99                                            const struct in6_addr *prefix, int prefixlen,
100                                            const struct in6_addr *gwaddr, int ifindex,
101                                            unsigned int pref);
102 static struct rt6_info *rt6_get_route_info(struct net *net,
103                                            const struct in6_addr *prefix, int prefixlen,
104                                            const struct in6_addr *gwaddr, int ifindex);
105 #endif
106
107 static void rt6_bind_peer(struct rt6_info *rt, int create)
108 {
109         struct inet_peer_base *base;
110         struct inet_peer *peer;
111
112         base = inetpeer_base_ptr(rt->_rt6i_peer);
113         if (!base)
114                 return;
115
116         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
117         if (peer) {
118                 if (!rt6_set_peer(rt, peer))
119                         inet_putpeer(peer);
120         }
121 }
122
123 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
124 {
125         if (rt6_has_peer(rt))
126                 return rt6_peer_ptr(rt);
127
128         rt6_bind_peer(rt, create);
129         return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
130 }
131
132 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
133 {
134         return __rt6_get_peer(rt, 1);
135 }
136
137 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138 {
139         struct rt6_info *rt = (struct rt6_info *) dst;
140         struct inet_peer *peer;
141         u32 *p = NULL;
142
143         if (!(rt->dst.flags & DST_HOST))
144                 return NULL;
145
146         peer = rt6_get_peer_create(rt);
147         if (peer) {
148                 u32 *old_p = __DST_METRICS_PTR(old);
149                 unsigned long prev, new;
150
151                 p = peer->metrics;
152                 if (inet_metrics_new(peer) ||
153                     (old & DST_METRICS_FORCE_OVERWRITE))
154                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
155
156                 new = (unsigned long) p;
157                 prev = cmpxchg(&dst->_metrics, old, new);
158
159                 if (prev != old) {
160                         p = __DST_METRICS_PTR(prev);
161                         if (prev & DST_METRICS_READ_ONLY)
162                                 p = NULL;
163                 }
164         }
165         return p;
166 }
167
168 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
169                                              struct sk_buff *skb,
170                                              const void *daddr)
171 {
172         struct in6_addr *p = &rt->rt6i_gateway;
173
174         if (!ipv6_addr_any(p))
175                 return (const void *) p;
176         else if (skb)
177                 return &ipv6_hdr(skb)->daddr;
178         return daddr;
179 }
180
181 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
182                                           struct sk_buff *skb,
183                                           const void *daddr)
184 {
185         struct rt6_info *rt = (struct rt6_info *) dst;
186         struct neighbour *n;
187
188         daddr = choose_neigh_daddr(rt, skb, daddr);
189         n = __ipv6_neigh_lookup(dst->dev, daddr);
190         if (n)
191                 return n;
192         return neigh_create(&nd_tbl, daddr, dst->dev);
193 }
194
195 static struct dst_ops ip6_dst_ops_template = {
196         .family                 =       AF_INET6,
197         .protocol               =       cpu_to_be16(ETH_P_IPV6),
198         .gc                     =       ip6_dst_gc,
199         .gc_thresh              =       1024,
200         .check                  =       ip6_dst_check,
201         .default_advmss         =       ip6_default_advmss,
202         .mtu                    =       ip6_mtu,
203         .cow_metrics            =       ipv6_cow_metrics,
204         .destroy                =       ip6_dst_destroy,
205         .ifdown                 =       ip6_dst_ifdown,
206         .negative_advice        =       ip6_negative_advice,
207         .link_failure           =       ip6_link_failure,
208         .update_pmtu            =       ip6_rt_update_pmtu,
209         .redirect               =       rt6_do_redirect,
210         .local_out              =       __ip6_local_out,
211         .neigh_lookup           =       ip6_neigh_lookup,
212 };
213
214 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
215 {
216         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
217
218         return mtu ? : dst->dev->mtu;
219 }
220
221 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
222                                          struct sk_buff *skb, u32 mtu)
223 {
224 }
225
226 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
227                                       struct sk_buff *skb)
228 {
229 }
230
231 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
232                                          unsigned long old)
233 {
234         return NULL;
235 }
236
237 static struct dst_ops ip6_dst_blackhole_ops = {
238         .family                 =       AF_INET6,
239         .protocol               =       cpu_to_be16(ETH_P_IPV6),
240         .destroy                =       ip6_dst_destroy,
241         .check                  =       ip6_dst_check,
242         .mtu                    =       ip6_blackhole_mtu,
243         .default_advmss         =       ip6_default_advmss,
244         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
245         .redirect               =       ip6_rt_blackhole_redirect,
246         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
247         .neigh_lookup           =       ip6_neigh_lookup,
248 };
249
250 static const u32 ip6_template_metrics[RTAX_MAX] = {
251         [RTAX_HOPLIMIT - 1] = 0,
252 };
253
254 static const struct rt6_info ip6_null_entry_template = {
255         .dst = {
256                 .__refcnt       = ATOMIC_INIT(1),
257                 .__use          = 1,
258                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
259                 .error          = -ENETUNREACH,
260                 .input          = ip6_pkt_discard,
261                 .output         = ip6_pkt_discard_out,
262         },
263         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
264         .rt6i_protocol  = RTPROT_KERNEL,
265         .rt6i_metric    = ~(u32) 0,
266         .rt6i_ref       = ATOMIC_INIT(1),
267 };
268
269 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
270
271 static const struct rt6_info ip6_prohibit_entry_template = {
272         .dst = {
273                 .__refcnt       = ATOMIC_INIT(1),
274                 .__use          = 1,
275                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
276                 .error          = -EACCES,
277                 .input          = ip6_pkt_prohibit,
278                 .output         = ip6_pkt_prohibit_out,
279         },
280         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
281         .rt6i_protocol  = RTPROT_KERNEL,
282         .rt6i_metric    = ~(u32) 0,
283         .rt6i_ref       = ATOMIC_INIT(1),
284 };
285
286 static const struct rt6_info ip6_blk_hole_entry_template = {
287         .dst = {
288                 .__refcnt       = ATOMIC_INIT(1),
289                 .__use          = 1,
290                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
291                 .error          = -EINVAL,
292                 .input          = dst_discard,
293                 .output         = dst_discard,
294         },
295         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
296         .rt6i_protocol  = RTPROT_KERNEL,
297         .rt6i_metric    = ~(u32) 0,
298         .rt6i_ref       = ATOMIC_INIT(1),
299 };
300
301 #endif
302
303 /* allocate dst with ip6_dst_ops */
304 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
305                                              struct net_device *dev,
306                                              int flags,
307                                              struct fib6_table *table)
308 {
309         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
310                                         0, DST_OBSOLETE_FORCE_CHK, flags);
311
312         if (rt) {
313                 struct dst_entry *dst = &rt->dst;
314
315                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
316                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
317                 rt->rt6i_genid = rt_genid_ipv6(net);
318                 INIT_LIST_HEAD(&rt->rt6i_siblings);
319         }
320         return rt;
321 }
322
323 static void ip6_dst_destroy(struct dst_entry *dst)
324 {
325         struct rt6_info *rt = (struct rt6_info *)dst;
326         struct inet6_dev *idev = rt->rt6i_idev;
327         struct dst_entry *from = dst->from;
328
329         if (!(rt->dst.flags & DST_HOST))
330                 dst_destroy_metrics_generic(dst);
331
332         if (idev) {
333                 rt->rt6i_idev = NULL;
334                 in6_dev_put(idev);
335         }
336
337         dst->from = NULL;
338         dst_release(from);
339
340         if (rt6_has_peer(rt)) {
341                 struct inet_peer *peer = rt6_peer_ptr(rt);
342                 inet_putpeer(peer);
343         }
344 }
345
346 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
347                            int how)
348 {
349         struct rt6_info *rt = (struct rt6_info *)dst;
350         struct inet6_dev *idev = rt->rt6i_idev;
351         struct net_device *loopback_dev =
352                 dev_net(dev)->loopback_dev;
353
354         if (dev != loopback_dev) {
355                 if (idev && idev->dev == dev) {
356                         struct inet6_dev *loopback_idev =
357                                 in6_dev_get(loopback_dev);
358                         if (loopback_idev) {
359                                 rt->rt6i_idev = loopback_idev;
360                                 in6_dev_put(idev);
361                         }
362                 }
363         }
364 }
365
366 static bool rt6_check_expired(const struct rt6_info *rt)
367 {
368         if (rt->rt6i_flags & RTF_EXPIRES) {
369                 if (time_after(jiffies, rt->dst.expires))
370                         return true;
371         } else if (rt->dst.from) {
372                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
373         }
374         return false;
375 }
376
377 /* Multipath route selection:
378  *   Hash based function using packet header and flowlabel.
379  * Adapted from fib_info_hashfn()
380  */
381 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
382                                const struct flowi6 *fl6)
383 {
384         unsigned int val = fl6->flowi6_proto;
385
386         val ^= ipv6_addr_hash(&fl6->daddr);
387         val ^= ipv6_addr_hash(&fl6->saddr);
388
389         /* Work only if this not encapsulated */
390         switch (fl6->flowi6_proto) {
391         case IPPROTO_UDP:
392         case IPPROTO_TCP:
393         case IPPROTO_SCTP:
394                 val ^= (__force u16)fl6->fl6_sport;
395                 val ^= (__force u16)fl6->fl6_dport;
396                 break;
397
398         case IPPROTO_ICMPV6:
399                 val ^= (__force u16)fl6->fl6_icmp_type;
400                 val ^= (__force u16)fl6->fl6_icmp_code;
401                 break;
402         }
403         /* RFC6438 recommands to use flowlabel */
404         val ^= (__force u32)fl6->flowlabel;
405
406         /* Perhaps, we need to tune, this function? */
407         val = val ^ (val >> 7) ^ (val >> 12);
408         return val % candidate_count;
409 }
410
411 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
412                                              struct flowi6 *fl6, int oif,
413                                              int strict)
414 {
415         struct rt6_info *sibling, *next_sibling;
416         int route_choosen;
417
418         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
419         /* Don't change the route, if route_choosen == 0
420          * (siblings does not include ourself)
421          */
422         if (route_choosen)
423                 list_for_each_entry_safe(sibling, next_sibling,
424                                 &match->rt6i_siblings, rt6i_siblings) {
425                         route_choosen--;
426                         if (route_choosen == 0) {
427                                 if (rt6_score_route(sibling, oif, strict) < 0)
428                                         break;
429                                 match = sibling;
430                                 break;
431                         }
432                 }
433         return match;
434 }
435
436 /*
437  *      Route lookup. Any table->tb6_lock is implied.
438  */
439
440 static inline struct rt6_info *rt6_device_match(struct net *net,
441                                                     struct rt6_info *rt,
442                                                     const struct in6_addr *saddr,
443                                                     int oif,
444                                                     int flags)
445 {
446         struct rt6_info *local = NULL;
447         struct rt6_info *sprt;
448
449         if (!oif && ipv6_addr_any(saddr))
450                 goto out;
451
452         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
453                 struct net_device *dev = sprt->dst.dev;
454
455                 if (oif) {
456                         if (dev->ifindex == oif)
457                                 return sprt;
458                         if (dev->flags & IFF_LOOPBACK) {
459                                 if (!sprt->rt6i_idev ||
460                                     sprt->rt6i_idev->dev->ifindex != oif) {
461                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
462                                                 continue;
463                                         if (local && (!oif ||
464                                                       local->rt6i_idev->dev->ifindex == oif))
465                                                 continue;
466                                 }
467                                 local = sprt;
468                         }
469                 } else {
470                         if (ipv6_chk_addr(net, saddr, dev,
471                                           flags & RT6_LOOKUP_F_IFACE))
472                                 return sprt;
473                 }
474         }
475
476         if (oif) {
477                 if (local)
478                         return local;
479
480                 if (flags & RT6_LOOKUP_F_IFACE)
481                         return net->ipv6.ip6_null_entry;
482         }
483 out:
484         return rt;
485 }
486
487 #ifdef CONFIG_IPV6_ROUTER_PREF
488 struct __rt6_probe_work {
489         struct work_struct work;
490         struct in6_addr target;
491         struct net_device *dev;
492 };
493
494 static void rt6_probe_deferred(struct work_struct *w)
495 {
496         struct in6_addr mcaddr;
497         struct __rt6_probe_work *work =
498                 container_of(w, struct __rt6_probe_work, work);
499
500         addrconf_addr_solict_mult(&work->target, &mcaddr);
501         ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
502         dev_put(work->dev);
503         kfree(w);
504 }
505
506 static void rt6_probe(struct rt6_info *rt)
507 {
508         struct neighbour *neigh;
509         /*
510          * Okay, this does not seem to be appropriate
511          * for now, however, we need to check if it
512          * is really so; aka Router Reachability Probing.
513          *
514          * Router Reachability Probe MUST be rate-limited
515          * to no more than one per minute.
516          */
517         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
518                 return;
519         rcu_read_lock_bh();
520         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
521         if (neigh) {
522                 write_lock(&neigh->lock);
523                 if (neigh->nud_state & NUD_VALID)
524                         goto out;
525         }
526
527         if (!neigh ||
528             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
529                 struct __rt6_probe_work *work;
530
531                 work = kmalloc(sizeof(*work), GFP_ATOMIC);
532
533                 if (neigh && work)
534                         __neigh_set_probe_once(neigh);
535
536                 if (neigh)
537                         write_unlock(&neigh->lock);
538
539                 if (work) {
540                         INIT_WORK(&work->work, rt6_probe_deferred);
541                         work->target = rt->rt6i_gateway;
542                         dev_hold(rt->dst.dev);
543                         work->dev = rt->dst.dev;
544                         schedule_work(&work->work);
545                 }
546         } else {
547 out:
548                 write_unlock(&neigh->lock);
549         }
550         rcu_read_unlock_bh();
551 }
552 #else
553 static inline void rt6_probe(struct rt6_info *rt)
554 {
555 }
556 #endif
557
558 /*
559  * Default Router Selection (RFC 2461 6.3.6)
560  */
561 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
562 {
563         struct net_device *dev = rt->dst.dev;
564         if (!oif || dev->ifindex == oif)
565                 return 2;
566         if ((dev->flags & IFF_LOOPBACK) &&
567             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
568                 return 1;
569         return 0;
570 }
571
572 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
573 {
574         struct neighbour *neigh;
575         enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
576
577         if (rt->rt6i_flags & RTF_NONEXTHOP ||
578             !(rt->rt6i_flags & RTF_GATEWAY))
579                 return RT6_NUD_SUCCEED;
580
581         rcu_read_lock_bh();
582         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
583         if (neigh) {
584                 read_lock(&neigh->lock);
585                 if (neigh->nud_state & NUD_VALID)
586                         ret = RT6_NUD_SUCCEED;
587 #ifdef CONFIG_IPV6_ROUTER_PREF
588                 else if (!(neigh->nud_state & NUD_FAILED))
589                         ret = RT6_NUD_SUCCEED;
590                 else
591                         ret = RT6_NUD_FAIL_PROBE;
592 #endif
593                 read_unlock(&neigh->lock);
594         } else {
595                 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
596                       RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
597         }
598         rcu_read_unlock_bh();
599
600         return ret;
601 }
602
603 static int rt6_score_route(struct rt6_info *rt, int oif,
604                            int strict)
605 {
606         int m;
607
608         m = rt6_check_dev(rt, oif);
609         if (!m && (strict & RT6_LOOKUP_F_IFACE))
610                 return RT6_NUD_FAIL_HARD;
611 #ifdef CONFIG_IPV6_ROUTER_PREF
612         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
613 #endif
614         if (strict & RT6_LOOKUP_F_REACHABLE) {
615                 int n = rt6_check_neigh(rt);
616                 if (n < 0)
617                         return n;
618         }
619         return m;
620 }
621
622 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
623                                    int *mpri, struct rt6_info *match,
624                                    bool *do_rr)
625 {
626         int m;
627         bool match_do_rr = false;
628
629         if (rt6_check_expired(rt))
630                 goto out;
631
632         m = rt6_score_route(rt, oif, strict);
633         if (m == RT6_NUD_FAIL_DO_RR) {
634                 match_do_rr = true;
635                 m = 0; /* lowest valid score */
636         } else if (m == RT6_NUD_FAIL_HARD) {
637                 goto out;
638         }
639
640         if (strict & RT6_LOOKUP_F_REACHABLE)
641                 rt6_probe(rt);
642
643         /* note that m can be RT6_NUD_FAIL_PROBE at this point */
644         if (m > *mpri) {
645                 *do_rr = match_do_rr;
646                 *mpri = m;
647                 match = rt;
648         }
649 out:
650         return match;
651 }
652
653 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
654                                      struct rt6_info *rr_head,
655                                      u32 metric, int oif, int strict,
656                                      bool *do_rr)
657 {
658         struct rt6_info *rt, *match;
659         int mpri = -1;
660
661         match = NULL;
662         for (rt = rr_head; rt && rt->rt6i_metric == metric;
663              rt = rt->dst.rt6_next)
664                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
665         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
666              rt = rt->dst.rt6_next)
667                 match = find_match(rt, oif, strict, &mpri, match, do_rr);
668
669         return match;
670 }
671
672 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
673 {
674         struct rt6_info *match, *rt0;
675         struct net *net;
676         bool do_rr = false;
677
678         rt0 = fn->rr_ptr;
679         if (!rt0)
680                 fn->rr_ptr = rt0 = fn->leaf;
681
682         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
683                              &do_rr);
684
685         if (do_rr) {
686                 struct rt6_info *next = rt0->dst.rt6_next;
687
688                 /* no entries matched; do round-robin */
689                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
690                         next = fn->leaf;
691
692                 if (next != rt0)
693                         fn->rr_ptr = next;
694         }
695
696         net = dev_net(rt0->dst.dev);
697         return match ? match : net->ipv6.ip6_null_entry;
698 }
699
700 #ifdef CONFIG_IPV6_ROUTE_INFO
701 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
702                   const struct in6_addr *gwaddr)
703 {
704         struct net *net = dev_net(dev);
705         struct route_info *rinfo = (struct route_info *) opt;
706         struct in6_addr prefix_buf, *prefix;
707         unsigned int pref;
708         unsigned long lifetime;
709         struct rt6_info *rt;
710
711         if (len < sizeof(struct route_info)) {
712                 return -EINVAL;
713         }
714
715         /* Sanity check for prefix_len and length */
716         if (rinfo->length > 3) {
717                 return -EINVAL;
718         } else if (rinfo->prefix_len > 128) {
719                 return -EINVAL;
720         } else if (rinfo->prefix_len > 64) {
721                 if (rinfo->length < 2) {
722                         return -EINVAL;
723                 }
724         } else if (rinfo->prefix_len > 0) {
725                 if (rinfo->length < 1) {
726                         return -EINVAL;
727                 }
728         }
729
730         pref = rinfo->route_pref;
731         if (pref == ICMPV6_ROUTER_PREF_INVALID)
732                 return -EINVAL;
733
734         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
735
736         if (rinfo->length == 3)
737                 prefix = (struct in6_addr *)rinfo->prefix;
738         else {
739                 /* this function is safe */
740                 ipv6_addr_prefix(&prefix_buf,
741                                  (struct in6_addr *)rinfo->prefix,
742                                  rinfo->prefix_len);
743                 prefix = &prefix_buf;
744         }
745
746         if (rinfo->prefix_len == 0)
747                 rt = rt6_get_dflt_router(gwaddr, dev);
748         else
749                 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
750                                         gwaddr, dev->ifindex);
751
752         if (rt && !lifetime) {
753                 ip6_del_rt(rt);
754                 rt = NULL;
755         }
756
757         if (!rt && lifetime)
758                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
759                                         pref);
760         else if (rt)
761                 rt->rt6i_flags = RTF_ROUTEINFO |
762                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
763
764         if (rt) {
765                 if (!addrconf_finite_timeout(lifetime))
766                         rt6_clean_expires(rt);
767                 else
768                         rt6_set_expires(rt, jiffies + HZ * lifetime);
769
770                 ip6_rt_put(rt);
771         }
772         return 0;
773 }
774 #endif
775
776 #define BACKTRACK(__net, saddr)                 \
777 do { \
778         if (rt == __net->ipv6.ip6_null_entry) { \
779                 struct fib6_node *pn; \
780                 while (1) { \
781                         if (fn->fn_flags & RTN_TL_ROOT) \
782                                 goto out; \
783                         pn = fn->parent; \
784                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
785                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
786                         else \
787                                 fn = pn; \
788                         if (fn->fn_flags & RTN_RTINFO) \
789                                 goto restart; \
790                 } \
791         } \
792 } while (0)
793
794 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
795                                              struct fib6_table *table,
796                                              struct flowi6 *fl6, int flags)
797 {
798         struct fib6_node *fn;
799         struct rt6_info *rt;
800
801         read_lock_bh(&table->tb6_lock);
802         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
803 restart:
804         rt = fn->leaf;
805         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
806         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
807                 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
808         BACKTRACK(net, &fl6->saddr);
809 out:
810         dst_use(&rt->dst, jiffies);
811         read_unlock_bh(&table->tb6_lock);
812         return rt;
813
814 }
815
816 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
817                                     int flags)
818 {
819         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
820 }
821 EXPORT_SYMBOL_GPL(ip6_route_lookup);
822
823 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
824                             const struct in6_addr *saddr, int oif, int strict)
825 {
826         struct flowi6 fl6 = {
827                 .flowi6_oif = oif,
828                 .daddr = *daddr,
829         };
830         struct dst_entry *dst;
831         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
832
833         if (saddr) {
834                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
835                 flags |= RT6_LOOKUP_F_HAS_SADDR;
836         }
837
838         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
839         if (dst->error == 0)
840                 return (struct rt6_info *) dst;
841
842         dst_release(dst);
843
844         return NULL;
845 }
846
847 EXPORT_SYMBOL(rt6_lookup);
848
849 /* ip6_ins_rt is called with FREE table->tb6_lock.
850    It takes new route entry, the addition fails by any reason the
851    route is freed. In any case, if caller does not hold it, it may
852    be destroyed.
853  */
854
855 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
856                         struct nlattr *mx, int mx_len)
857 {
858         int err;
859         struct fib6_table *table;
860
861         table = rt->rt6i_table;
862         write_lock_bh(&table->tb6_lock);
863         err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
864         write_unlock_bh(&table->tb6_lock);
865
866         return err;
867 }
868
869 int ip6_ins_rt(struct rt6_info *rt)
870 {
871         struct nl_info info = {
872                 .nl_net = dev_net(rt->dst.dev),
873         };
874         return __ip6_ins_rt(rt, &info, NULL, 0);
875 }
876
877 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
878                                       const struct in6_addr *daddr,
879                                       const struct in6_addr *saddr)
880 {
881         struct rt6_info *rt;
882
883         /*
884          *      Clone the route.
885          */
886
887         rt = ip6_rt_copy(ort, daddr);
888
889         if (rt) {
890                 if (ort->rt6i_dst.plen != 128 &&
891                     ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
892                         rt->rt6i_flags |= RTF_ANYCAST;
893
894                 rt->rt6i_flags |= RTF_CACHE;
895
896 #ifdef CONFIG_IPV6_SUBTREES
897                 if (rt->rt6i_src.plen && saddr) {
898                         rt->rt6i_src.addr = *saddr;
899                         rt->rt6i_src.plen = 128;
900                 }
901 #endif
902         }
903
904         return rt;
905 }
906
907 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
908                                         const struct in6_addr *daddr)
909 {
910         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
911
912         if (rt)
913                 rt->rt6i_flags |= RTF_CACHE;
914         return rt;
915 }
916
917 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
918                                       struct flowi6 *fl6, int flags)
919 {
920         struct fib6_node *fn;
921         struct rt6_info *rt, *nrt;
922         int strict = 0;
923         int attempts = 3;
924         int err;
925         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
926
927         strict |= flags & RT6_LOOKUP_F_IFACE;
928
929 relookup:
930         read_lock_bh(&table->tb6_lock);
931
932 restart_2:
933         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
934
935 restart:
936         rt = rt6_select(fn, oif, strict | reachable);
937         if (rt->rt6i_nsiblings)
938                 rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
939         BACKTRACK(net, &fl6->saddr);
940         if (rt == net->ipv6.ip6_null_entry ||
941             rt->rt6i_flags & RTF_CACHE)
942                 goto out;
943
944         dst_hold(&rt->dst);
945         read_unlock_bh(&table->tb6_lock);
946
947         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
948                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
949         else if (!(rt->dst.flags & DST_HOST))
950                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
951         else
952                 goto out2;
953
954         ip6_rt_put(rt);
955         rt = nrt ? : net->ipv6.ip6_null_entry;
956
957         dst_hold(&rt->dst);
958         if (nrt) {
959                 err = ip6_ins_rt(nrt);
960                 if (!err)
961                         goto out2;
962         }
963
964         if (--attempts <= 0)
965                 goto out2;
966
967         /*
968          * Race condition! In the gap, when table->tb6_lock was
969          * released someone could insert this route.  Relookup.
970          */
971         ip6_rt_put(rt);
972         goto relookup;
973
974 out:
975         if (reachable) {
976                 reachable = 0;
977                 goto restart_2;
978         }
979         dst_hold(&rt->dst);
980         read_unlock_bh(&table->tb6_lock);
981 out2:
982         rt->dst.lastuse = jiffies;
983         rt->dst.__use++;
984
985         return rt;
986 }
987
988 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
989                                             struct flowi6 *fl6, int flags)
990 {
991         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
992 }
993
994 static struct dst_entry *ip6_route_input_lookup(struct net *net,
995                                                 struct net_device *dev,
996                                                 struct flowi6 *fl6, int flags)
997 {
998         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
999                 flags |= RT6_LOOKUP_F_IFACE;
1000
1001         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1002 }
1003
1004 void ip6_route_input(struct sk_buff *skb)
1005 {
1006         const struct ipv6hdr *iph = ipv6_hdr(skb);
1007         struct net *net = dev_net(skb->dev);
1008         int flags = RT6_LOOKUP_F_HAS_SADDR;
1009         struct flowi6 fl6 = {
1010                 .flowi6_iif = skb->dev->ifindex,
1011                 .daddr = iph->daddr,
1012                 .saddr = iph->saddr,
1013                 .flowlabel = ip6_flowinfo(iph),
1014                 .flowi6_mark = skb->mark,
1015                 .flowi6_proto = iph->nexthdr,
1016         };
1017
1018         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1019 }
1020
1021 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1022                                              struct flowi6 *fl6, int flags)
1023 {
1024         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1025 }
1026
1027 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1028                                     struct flowi6 *fl6)
1029 {
1030         int flags = 0;
1031
1032         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1033
1034         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1035                 flags |= RT6_LOOKUP_F_IFACE;
1036
1037         if (!ipv6_addr_any(&fl6->saddr))
1038                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1039         else if (sk)
1040                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1041
1042         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1043 }
1044
1045 EXPORT_SYMBOL(ip6_route_output);
1046
1047 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1048 {
1049         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1050         struct dst_entry *new = NULL;
1051
1052         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1053         if (rt) {
1054                 new = &rt->dst;
1055
1056                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1057                 rt6_init_peer(rt, net->ipv6.peers);
1058
1059                 new->__use = 1;
1060                 new->input = dst_discard;
1061                 new->output = dst_discard;
1062
1063                 if (dst_metrics_read_only(&ort->dst))
1064                         new->_metrics = ort->dst._metrics;
1065                 else
1066                         dst_copy_metrics(new, &ort->dst);
1067                 rt->rt6i_idev = ort->rt6i_idev;
1068                 if (rt->rt6i_idev)
1069                         in6_dev_hold(rt->rt6i_idev);
1070
1071                 rt->rt6i_gateway = ort->rt6i_gateway;
1072                 rt->rt6i_flags = ort->rt6i_flags;
1073                 rt->rt6i_metric = 0;
1074
1075                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1076 #ifdef CONFIG_IPV6_SUBTREES
1077                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1078 #endif
1079
1080                 dst_free(new);
1081         }
1082
1083         dst_release(dst_orig);
1084         return new ? new : ERR_PTR(-ENOMEM);
1085 }
1086
1087 /*
1088  *      Destination cache support functions
1089  */
1090
1091 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1092 {
1093         struct rt6_info *rt;
1094
1095         rt = (struct rt6_info *) dst;
1096
1097         /* All IPV6 dsts are created with ->obsolete set to the value
1098          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1099          * into this function always.
1100          */
1101         if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
1102                 return NULL;
1103
1104         if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1105                 return NULL;
1106
1107         if (rt6_check_expired(rt))
1108                 return NULL;
1109
1110         return dst;
1111 }
1112
1113 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1114 {
1115         struct rt6_info *rt = (struct rt6_info *) dst;
1116
1117         if (rt) {
1118                 if (rt->rt6i_flags & RTF_CACHE) {
1119                         if (rt6_check_expired(rt)) {
1120                                 ip6_del_rt(rt);
1121                                 dst = NULL;
1122                         }
1123                 } else {
1124                         dst_release(dst);
1125                         dst = NULL;
1126                 }
1127         }
1128         return dst;
1129 }
1130
1131 static void ip6_link_failure(struct sk_buff *skb)
1132 {
1133         struct rt6_info *rt;
1134
1135         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1136
1137         rt = (struct rt6_info *) skb_dst(skb);
1138         if (rt) {
1139                 if (rt->rt6i_flags & RTF_CACHE) {
1140                         dst_hold(&rt->dst);
1141                         if (ip6_del_rt(rt))
1142                                 dst_free(&rt->dst);
1143                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1144                         rt->rt6i_node->fn_sernum = -1;
1145                 }
1146         }
1147 }
1148
1149 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1150                                struct sk_buff *skb, u32 mtu)
1151 {
1152         struct rt6_info *rt6 = (struct rt6_info*)dst;
1153
1154         dst_confirm(dst);
1155         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1156                 struct net *net = dev_net(dst->dev);
1157
1158                 rt6->rt6i_flags |= RTF_MODIFIED;
1159                 if (mtu < IPV6_MIN_MTU) {
1160                         u32 features = dst_metric(dst, RTAX_FEATURES);
1161                         mtu = IPV6_MIN_MTU;
1162                         features |= RTAX_FEATURE_ALLFRAG;
1163                         dst_metric_set(dst, RTAX_FEATURES, features);
1164                 }
1165                 dst_metric_set(dst, RTAX_MTU, mtu);
1166                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1167         }
1168 }
1169
1170 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1171                      int oif, u32 mark)
1172 {
1173         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1174         struct dst_entry *dst;
1175         struct flowi6 fl6;
1176
1177         memset(&fl6, 0, sizeof(fl6));
1178         fl6.flowi6_oif = oif;
1179         fl6.flowi6_mark = mark;
1180         fl6.daddr = iph->daddr;
1181         fl6.saddr = iph->saddr;
1182         fl6.flowlabel = ip6_flowinfo(iph);
1183
1184         dst = ip6_route_output(net, NULL, &fl6);
1185         if (!dst->error)
1186                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1187         dst_release(dst);
1188 }
1189 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1190
1191 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1192 {
1193         ip6_update_pmtu(skb, sock_net(sk), mtu,
1194                         sk->sk_bound_dev_if, sk->sk_mark);
1195 }
1196 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1197
1198 /* Handle redirects */
1199 struct ip6rd_flowi {
1200         struct flowi6 fl6;
1201         struct in6_addr gateway;
1202 };
1203
1204 static struct rt6_info *__ip6_route_redirect(struct net *net,
1205                                              struct fib6_table *table,
1206                                              struct flowi6 *fl6,
1207                                              int flags)
1208 {
1209         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1210         struct rt6_info *rt;
1211         struct fib6_node *fn;
1212
1213         /* Get the "current" route for this destination and
1214          * check if the redirect has come from approriate router.
1215          *
1216          * RFC 4861 specifies that redirects should only be
1217          * accepted if they come from the nexthop to the target.
1218          * Due to the way the routes are chosen, this notion
1219          * is a bit fuzzy and one might need to check all possible
1220          * routes.
1221          */
1222
1223         read_lock_bh(&table->tb6_lock);
1224         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1225 restart:
1226         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1227                 if (rt6_check_expired(rt))
1228                         continue;
1229                 if (rt->dst.error)
1230                         break;
1231                 if (!(rt->rt6i_flags & RTF_GATEWAY))
1232                         continue;
1233                 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1234                         continue;
1235                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1236                         continue;
1237                 break;
1238         }
1239
1240         if (!rt)
1241                 rt = net->ipv6.ip6_null_entry;
1242         else if (rt->dst.error) {
1243                 rt = net->ipv6.ip6_null_entry;
1244                 goto out;
1245         }
1246         BACKTRACK(net, &fl6->saddr);
1247 out:
1248         dst_hold(&rt->dst);
1249
1250         read_unlock_bh(&table->tb6_lock);
1251
1252         return rt;
1253 };
1254
1255 static struct dst_entry *ip6_route_redirect(struct net *net,
1256                                         const struct flowi6 *fl6,
1257                                         const struct in6_addr *gateway)
1258 {
1259         int flags = RT6_LOOKUP_F_HAS_SADDR;
1260         struct ip6rd_flowi rdfl;
1261
1262         rdfl.fl6 = *fl6;
1263         rdfl.gateway = *gateway;
1264
1265         return fib6_rule_lookup(net, &rdfl.fl6,
1266                                 flags, __ip6_route_redirect);
1267 }
1268
1269 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1270 {
1271         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1272         struct dst_entry *dst;
1273         struct flowi6 fl6;
1274
1275         memset(&fl6, 0, sizeof(fl6));
1276         fl6.flowi6_oif = oif;
1277         fl6.flowi6_mark = mark;
1278         fl6.daddr = iph->daddr;
1279         fl6.saddr = iph->saddr;
1280         fl6.flowlabel = ip6_flowinfo(iph);
1281
1282         dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1283         rt6_do_redirect(dst, NULL, skb);
1284         dst_release(dst);
1285 }
1286 EXPORT_SYMBOL_GPL(ip6_redirect);
1287
1288 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1289                             u32 mark)
1290 {
1291         const struct ipv6hdr *iph = ipv6_hdr(skb);
1292         const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1293         struct dst_entry *dst;
1294         struct flowi6 fl6;
1295
1296         memset(&fl6, 0, sizeof(fl6));
1297         fl6.flowi6_oif = oif;
1298         fl6.flowi6_mark = mark;
1299         fl6.daddr = msg->dest;
1300         fl6.saddr = iph->daddr;
1301
1302         dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1303         rt6_do_redirect(dst, NULL, skb);
1304         dst_release(dst);
1305 }
1306
1307 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1308 {
1309         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1310 }
1311 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1312
1313 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1314 {
1315         struct net_device *dev = dst->dev;
1316         unsigned int mtu = dst_mtu(dst);
1317         struct net *net = dev_net(dev);
1318
1319         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1320
1321         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1322                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1323
1324         /*
1325          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1326          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1327          * IPV6_MAXPLEN is also valid and means: "any MSS,
1328          * rely only on pmtu discovery"
1329          */
1330         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1331                 mtu = IPV6_MAXPLEN;
1332         return mtu;
1333 }
1334
1335 static unsigned int ip6_mtu(const struct dst_entry *dst)
1336 {
1337         struct inet6_dev *idev;
1338         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1339
1340         if (mtu)
1341                 return mtu;
1342
1343         mtu = IPV6_MIN_MTU;
1344
1345         rcu_read_lock();
1346         idev = __in6_dev_get(dst->dev);
1347         if (idev)
1348                 mtu = idev->cnf.mtu6;
1349         rcu_read_unlock();
1350
1351         return mtu;
1352 }
1353
1354 static struct dst_entry *icmp6_dst_gc_list;
1355 static DEFINE_SPINLOCK(icmp6_dst_lock);
1356
1357 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1358                                   struct flowi6 *fl6)
1359 {
1360         struct dst_entry *dst;
1361         struct rt6_info *rt;
1362         struct inet6_dev *idev = in6_dev_get(dev);
1363         struct net *net = dev_net(dev);
1364
1365         if (unlikely(!idev))
1366                 return ERR_PTR(-ENODEV);
1367
1368         rt = ip6_dst_alloc(net, dev, 0, NULL);
1369         if (unlikely(!rt)) {
1370                 in6_dev_put(idev);
1371                 dst = ERR_PTR(-ENOMEM);
1372                 goto out;
1373         }
1374
1375         rt->dst.flags |= DST_HOST;
1376         rt->dst.output  = ip6_output;
1377         atomic_set(&rt->dst.__refcnt, 1);
1378         rt->rt6i_gateway  = fl6->daddr;
1379         rt->rt6i_dst.addr = fl6->daddr;
1380         rt->rt6i_dst.plen = 128;
1381         rt->rt6i_idev     = idev;
1382         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1383
1384         spin_lock_bh(&icmp6_dst_lock);
1385         rt->dst.next = icmp6_dst_gc_list;
1386         icmp6_dst_gc_list = &rt->dst;
1387         spin_unlock_bh(&icmp6_dst_lock);
1388
1389         fib6_force_start_gc(net);
1390
1391         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1392
1393 out:
1394         return dst;
1395 }
1396
1397 int icmp6_dst_gc(void)
1398 {
1399         struct dst_entry *dst, **pprev;
1400         int more = 0;
1401
1402         spin_lock_bh(&icmp6_dst_lock);
1403         pprev = &icmp6_dst_gc_list;
1404
1405         while ((dst = *pprev) != NULL) {
1406                 if (!atomic_read(&dst->__refcnt)) {
1407                         *pprev = dst->next;
1408                         dst_free(dst);
1409                 } else {
1410                         pprev = &dst->next;
1411                         ++more;
1412                 }
1413         }
1414
1415         spin_unlock_bh(&icmp6_dst_lock);
1416
1417         return more;
1418 }
1419
1420 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1421                             void *arg)
1422 {
1423         struct dst_entry *dst, **pprev;
1424
1425         spin_lock_bh(&icmp6_dst_lock);
1426         pprev = &icmp6_dst_gc_list;
1427         while ((dst = *pprev) != NULL) {
1428                 struct rt6_info *rt = (struct rt6_info *) dst;
1429                 if (func(rt, arg)) {
1430                         *pprev = dst->next;
1431                         dst_free(dst);
1432                 } else {
1433                         pprev = &dst->next;
1434                 }
1435         }
1436         spin_unlock_bh(&icmp6_dst_lock);
1437 }
1438
1439 static int ip6_dst_gc(struct dst_ops *ops)
1440 {
1441         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1442         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1443         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1444         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1445         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1446         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1447         int entries;
1448
1449         entries = dst_entries_get_fast(ops);
1450         if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1451             entries <= rt_max_size)
1452                 goto out;
1453
1454         net->ipv6.ip6_rt_gc_expire++;
1455         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1456         entries = dst_entries_get_slow(ops);
1457         if (entries < ops->gc_thresh)
1458                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1459 out:
1460         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1461         return entries > rt_max_size;
1462 }
1463
1464 /*
1465  *
1466  */
1467
1468 int ip6_route_add(struct fib6_config *cfg)
1469 {
1470         int err;
1471         struct net *net = cfg->fc_nlinfo.nl_net;
1472         struct rt6_info *rt = NULL;
1473         struct net_device *dev = NULL;
1474         struct inet6_dev *idev = NULL;
1475         struct fib6_table *table;
1476         int addr_type;
1477
1478         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1479                 return -EINVAL;
1480 #ifndef CONFIG_IPV6_SUBTREES
1481         if (cfg->fc_src_len)
1482                 return -EINVAL;
1483 #endif
1484         if (cfg->fc_ifindex) {
1485                 err = -ENODEV;
1486                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1487                 if (!dev)
1488                         goto out;
1489                 idev = in6_dev_get(dev);
1490                 if (!idev)
1491                         goto out;
1492         }
1493
1494         if (cfg->fc_metric == 0)
1495                 cfg->fc_metric = IP6_RT_PRIO_USER;
1496
1497         err = -ENOBUFS;
1498         if (cfg->fc_nlinfo.nlh &&
1499             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1500                 table = fib6_get_table(net, cfg->fc_table);
1501                 if (!table) {
1502                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1503                         table = fib6_new_table(net, cfg->fc_table);
1504                 }
1505         } else {
1506                 table = fib6_new_table(net, cfg->fc_table);
1507         }
1508
1509         if (!table)
1510                 goto out;
1511
1512         rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1513
1514         if (!rt) {
1515                 err = -ENOMEM;
1516                 goto out;
1517         }
1518
1519         if (cfg->fc_flags & RTF_EXPIRES)
1520                 rt6_set_expires(rt, jiffies +
1521                                 clock_t_to_jiffies(cfg->fc_expires));
1522         else
1523                 rt6_clean_expires(rt);
1524
1525         if (cfg->fc_protocol == RTPROT_UNSPEC)
1526                 cfg->fc_protocol = RTPROT_BOOT;
1527         rt->rt6i_protocol = cfg->fc_protocol;
1528
1529         addr_type = ipv6_addr_type(&cfg->fc_dst);
1530
1531         if (addr_type & IPV6_ADDR_MULTICAST)
1532                 rt->dst.input = ip6_mc_input;
1533         else if (cfg->fc_flags & RTF_LOCAL)
1534                 rt->dst.input = ip6_input;
1535         else
1536                 rt->dst.input = ip6_forward;
1537
1538         rt->dst.output = ip6_output;
1539
1540         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1541         rt->rt6i_dst.plen = cfg->fc_dst_len;
1542         if (rt->rt6i_dst.plen == 128) {
1543                 rt->dst.flags |= DST_HOST;
1544                 dst_metrics_set_force_overwrite(&rt->dst);
1545         }
1546
1547 #ifdef CONFIG_IPV6_SUBTREES
1548         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1549         rt->rt6i_src.plen = cfg->fc_src_len;
1550 #endif
1551
1552         rt->rt6i_metric = cfg->fc_metric;
1553
1554         /* We cannot add true routes via loopback here,
1555            they would result in kernel looping; promote them to reject routes
1556          */
1557         if ((cfg->fc_flags & RTF_REJECT) ||
1558             (dev && (dev->flags & IFF_LOOPBACK) &&
1559              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1560              !(cfg->fc_flags & RTF_LOCAL))) {
1561                 /* hold loopback dev/idev if we haven't done so. */
1562                 if (dev != net->loopback_dev) {
1563                         if (dev) {
1564                                 dev_put(dev);
1565                                 in6_dev_put(idev);
1566                         }
1567                         dev = net->loopback_dev;
1568                         dev_hold(dev);
1569                         idev = in6_dev_get(dev);
1570                         if (!idev) {
1571                                 err = -ENODEV;
1572                                 goto out;
1573                         }
1574                 }
1575                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1576                 switch (cfg->fc_type) {
1577                 case RTN_BLACKHOLE:
1578                         rt->dst.error = -EINVAL;
1579                         rt->dst.output = dst_discard;
1580                         rt->dst.input = dst_discard;
1581                         break;
1582                 case RTN_PROHIBIT:
1583                         rt->dst.error = -EACCES;
1584                         rt->dst.output = ip6_pkt_prohibit_out;
1585                         rt->dst.input = ip6_pkt_prohibit;
1586                         break;
1587                 case RTN_THROW:
1588                 default:
1589                         rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1590                                         : -ENETUNREACH;
1591                         rt->dst.output = ip6_pkt_discard_out;
1592                         rt->dst.input = ip6_pkt_discard;
1593                         break;
1594                 }
1595                 goto install_route;
1596         }
1597
1598         if (cfg->fc_flags & RTF_GATEWAY) {
1599                 const struct in6_addr *gw_addr;
1600                 int gwa_type;
1601
1602                 gw_addr = &cfg->fc_gateway;
1603                 rt->rt6i_gateway = *gw_addr;
1604                 gwa_type = ipv6_addr_type(gw_addr);
1605
1606                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1607                         struct rt6_info *grt;
1608
1609                         /* IPv6 strictly inhibits using not link-local
1610                            addresses as nexthop address.
1611                            Otherwise, router will not able to send redirects.
1612                            It is very good, but in some (rare!) circumstances
1613                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1614                            some exceptions. --ANK
1615                          */
1616                         err = -EINVAL;
1617                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1618                                 goto out;
1619
1620                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1621
1622                         err = -EHOSTUNREACH;
1623                         if (!grt)
1624                                 goto out;
1625                         if (dev) {
1626                                 if (dev != grt->dst.dev) {
1627                                         ip6_rt_put(grt);
1628                                         goto out;
1629                                 }
1630                         } else {
1631                                 dev = grt->dst.dev;
1632                                 idev = grt->rt6i_idev;
1633                                 dev_hold(dev);
1634                                 in6_dev_hold(grt->rt6i_idev);
1635                         }
1636                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1637                                 err = 0;
1638                         ip6_rt_put(grt);
1639
1640                         if (err)
1641                                 goto out;
1642                 }
1643                 err = -EINVAL;
1644                 if (!dev || (dev->flags & IFF_LOOPBACK))
1645                         goto out;
1646         }
1647
1648         err = -ENODEV;
1649         if (!dev)
1650                 goto out;
1651
1652         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1653                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1654                         err = -EINVAL;
1655                         goto out;
1656                 }
1657                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1658                 rt->rt6i_prefsrc.plen = 128;
1659         } else
1660                 rt->rt6i_prefsrc.plen = 0;
1661
1662         rt->rt6i_flags = cfg->fc_flags;
1663
1664 install_route:
1665         rt->dst.dev = dev;
1666         rt->rt6i_idev = idev;
1667         rt->rt6i_table = table;
1668
1669         cfg->fc_nlinfo.nl_net = dev_net(dev);
1670
1671         return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
1672
1673 out:
1674         if (dev)
1675                 dev_put(dev);
1676         if (idev)
1677                 in6_dev_put(idev);
1678         if (rt)
1679                 dst_free(&rt->dst);
1680         return err;
1681 }
1682
1683 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1684 {
1685         int err;
1686         struct fib6_table *table;
1687         struct net *net = dev_net(rt->dst.dev);
1688
1689         if (rt == net->ipv6.ip6_null_entry) {
1690                 err = -ENOENT;
1691                 goto out;
1692         }
1693
1694         table = rt->rt6i_table;
1695         write_lock_bh(&table->tb6_lock);
1696         err = fib6_del(rt, info);
1697         write_unlock_bh(&table->tb6_lock);
1698
1699 out:
1700         ip6_rt_put(rt);
1701         return err;
1702 }
1703
1704 int ip6_del_rt(struct rt6_info *rt)
1705 {
1706         struct nl_info info = {
1707                 .nl_net = dev_net(rt->dst.dev),
1708         };
1709         return __ip6_del_rt(rt, &info);
1710 }
1711
1712 static int ip6_route_del(struct fib6_config *cfg)
1713 {
1714         struct fib6_table *table;
1715         struct fib6_node *fn;
1716         struct rt6_info *rt;
1717         int err = -ESRCH;
1718
1719         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1720         if (!table)
1721                 return err;
1722
1723         read_lock_bh(&table->tb6_lock);
1724
1725         fn = fib6_locate(&table->tb6_root,
1726                          &cfg->fc_dst, cfg->fc_dst_len,
1727                          &cfg->fc_src, cfg->fc_src_len);
1728
1729         if (fn) {
1730                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1731                         if (cfg->fc_ifindex &&
1732                             (!rt->dst.dev ||
1733                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1734                                 continue;
1735                         if (cfg->fc_flags & RTF_GATEWAY &&
1736                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1737                                 continue;
1738                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1739                                 continue;
1740                         dst_hold(&rt->dst);
1741                         read_unlock_bh(&table->tb6_lock);
1742
1743                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1744                 }
1745         }
1746         read_unlock_bh(&table->tb6_lock);
1747
1748         return err;
1749 }
1750
1751 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1752 {
1753         struct net *net = dev_net(skb->dev);
1754         struct netevent_redirect netevent;
1755         struct rt6_info *rt, *nrt = NULL;
1756         struct ndisc_options ndopts;
1757         struct inet6_dev *in6_dev;
1758         struct neighbour *neigh;
1759         struct rd_msg *msg;
1760         int optlen, on_link;
1761         u8 *lladdr;
1762
1763         optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1764         optlen -= sizeof(*msg);
1765
1766         if (optlen < 0) {
1767                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1768                 return;
1769         }
1770
1771         msg = (struct rd_msg *)icmp6_hdr(skb);
1772
1773         if (ipv6_addr_is_multicast(&msg->dest)) {
1774                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1775                 return;
1776         }
1777
1778         on_link = 0;
1779         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1780                 on_link = 1;
1781         } else if (ipv6_addr_type(&msg->target) !=
1782                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1783                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1784                 return;
1785         }
1786
1787         in6_dev = __in6_dev_get(skb->dev);
1788         if (!in6_dev)
1789                 return;
1790         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1791                 return;
1792
1793         /* RFC2461 8.1:
1794          *      The IP source address of the Redirect MUST be the same as the current
1795          *      first-hop router for the specified ICMP Destination Address.
1796          */
1797
1798         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1799                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1800                 return;
1801         }
1802
1803         lladdr = NULL;
1804         if (ndopts.nd_opts_tgt_lladdr) {
1805                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1806                                              skb->dev);
1807                 if (!lladdr) {
1808                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1809                         return;
1810                 }
1811         }
1812
1813         rt = (struct rt6_info *) dst;
1814         if (rt == net->ipv6.ip6_null_entry) {
1815                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1816                 return;
1817         }
1818
1819         /* Redirect received -> path was valid.
1820          * Look, redirects are sent only in response to data packets,
1821          * so that this nexthop apparently is reachable. --ANK
1822          */
1823         dst_confirm(&rt->dst);
1824
1825         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1826         if (!neigh)
1827                 return;
1828
1829         /*
1830          *      We have finally decided to accept it.
1831          */
1832
1833         neigh_update(neigh, lladdr, NUD_STALE,
1834                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1835                      NEIGH_UPDATE_F_OVERRIDE|
1836                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1837                                      NEIGH_UPDATE_F_ISROUTER))
1838                      );
1839
1840         nrt = ip6_rt_copy(rt, &msg->dest);
1841         if (!nrt)
1842                 goto out;
1843
1844         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1845         if (on_link)
1846                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1847
1848         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1849
1850         if (ip6_ins_rt(nrt))
1851                 goto out;
1852
1853         netevent.old = &rt->dst;
1854         netevent.new = &nrt->dst;
1855         netevent.daddr = &msg->dest;
1856         netevent.neigh = neigh;
1857         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1858
1859         if (rt->rt6i_flags & RTF_CACHE) {
1860                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1861                 ip6_del_rt(rt);
1862         }
1863
1864 out:
1865         neigh_release(neigh);
1866 }
1867
1868 /*
1869  *      Misc support functions
1870  */
1871
1872 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1873                                     const struct in6_addr *dest)
1874 {
1875         struct net *net = dev_net(ort->dst.dev);
1876         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1877                                             ort->rt6i_table);
1878
1879         if (rt) {
1880                 rt->dst.input = ort->dst.input;
1881                 rt->dst.output = ort->dst.output;
1882                 rt->dst.flags |= DST_HOST;
1883
1884                 rt->rt6i_dst.addr = *dest;
1885                 rt->rt6i_dst.plen = 128;
1886                 dst_copy_metrics(&rt->dst, &ort->dst);
1887                 rt->dst.error = ort->dst.error;
1888                 rt->rt6i_idev = ort->rt6i_idev;
1889                 if (rt->rt6i_idev)
1890                         in6_dev_hold(rt->rt6i_idev);
1891                 rt->dst.lastuse = jiffies;
1892
1893                 if (ort->rt6i_flags & RTF_GATEWAY)
1894                         rt->rt6i_gateway = ort->rt6i_gateway;
1895                 else
1896                         rt->rt6i_gateway = *dest;
1897                 rt->rt6i_flags = ort->rt6i_flags;
1898                 rt6_set_from(rt, ort);
1899                 rt->rt6i_metric = 0;
1900
1901 #ifdef CONFIG_IPV6_SUBTREES
1902                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1903 #endif
1904                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1905                 rt->rt6i_table = ort->rt6i_table;
1906         }
1907         return rt;
1908 }
1909
1910 #ifdef CONFIG_IPV6_ROUTE_INFO
1911 static struct rt6_info *rt6_get_route_info(struct net *net,
1912                                            const struct in6_addr *prefix, int prefixlen,
1913                                            const struct in6_addr *gwaddr, int ifindex)
1914 {
1915         struct fib6_node *fn;
1916         struct rt6_info *rt = NULL;
1917         struct fib6_table *table;
1918
1919         table = fib6_get_table(net, RT6_TABLE_INFO);
1920         if (!table)
1921                 return NULL;
1922
1923         read_lock_bh(&table->tb6_lock);
1924         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1925         if (!fn)
1926                 goto out;
1927
1928         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1929                 if (rt->dst.dev->ifindex != ifindex)
1930                         continue;
1931                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1932                         continue;
1933                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1934                         continue;
1935                 dst_hold(&rt->dst);
1936                 break;
1937         }
1938 out:
1939         read_unlock_bh(&table->tb6_lock);
1940         return rt;
1941 }
1942
1943 static struct rt6_info *rt6_add_route_info(struct net *net,
1944                                            const struct in6_addr *prefix, int prefixlen,
1945                                            const struct in6_addr *gwaddr, int ifindex,
1946                                            unsigned int pref)
1947 {
1948         struct fib6_config cfg = {
1949                 .fc_table       = RT6_TABLE_INFO,
1950                 .fc_metric      = IP6_RT_PRIO_USER,
1951                 .fc_ifindex     = ifindex,
1952                 .fc_dst_len     = prefixlen,
1953                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1954                                   RTF_UP | RTF_PREF(pref),
1955                 .fc_nlinfo.portid = 0,
1956                 .fc_nlinfo.nlh = NULL,
1957                 .fc_nlinfo.nl_net = net,
1958         };
1959
1960         cfg.fc_dst = *prefix;
1961         cfg.fc_gateway = *gwaddr;
1962
1963         /* We should treat it as a default route if prefix length is 0. */
1964         if (!prefixlen)
1965                 cfg.fc_flags |= RTF_DEFAULT;
1966
1967         ip6_route_add(&cfg);
1968
1969         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1970 }
1971 #endif
1972
1973 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1974 {
1975         struct rt6_info *rt;
1976         struct fib6_table *table;
1977
1978         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1979         if (!table)
1980                 return NULL;
1981
1982         read_lock_bh(&table->tb6_lock);
1983         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1984                 if (dev == rt->dst.dev &&
1985                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1986                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1987                         break;
1988         }
1989         if (rt)
1990                 dst_hold(&rt->dst);
1991         read_unlock_bh(&table->tb6_lock);
1992         return rt;
1993 }
1994
1995 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1996                                      struct net_device *dev,
1997                                      unsigned int pref)
1998 {
1999         struct fib6_config cfg = {
2000                 .fc_table       = RT6_TABLE_DFLT,
2001                 .fc_metric      = IP6_RT_PRIO_USER,
2002                 .fc_ifindex     = dev->ifindex,
2003                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2004                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2005                 .fc_nlinfo.portid = 0,
2006                 .fc_nlinfo.nlh = NULL,
2007                 .fc_nlinfo.nl_net = dev_net(dev),
2008         };
2009
2010         cfg.fc_gateway = *gwaddr;
2011
2012         ip6_route_add(&cfg);
2013
2014         return rt6_get_dflt_router(gwaddr, dev);
2015 }
2016
2017 void rt6_purge_dflt_routers(struct net *net)
2018 {
2019         struct rt6_info *rt;
2020         struct fib6_table *table;
2021
2022         /* NOTE: Keep consistent with rt6_get_dflt_router */
2023         table = fib6_get_table(net, RT6_TABLE_DFLT);
2024         if (!table)
2025                 return;
2026
2027 restart:
2028         read_lock_bh(&table->tb6_lock);
2029         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2030                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2031                     (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2032                         dst_hold(&rt->dst);
2033                         read_unlock_bh(&table->tb6_lock);
2034                         ip6_del_rt(rt);
2035                         goto restart;
2036                 }
2037         }
2038         read_unlock_bh(&table->tb6_lock);
2039 }
2040
2041 static void rtmsg_to_fib6_config(struct net *net,
2042                                  struct in6_rtmsg *rtmsg,
2043                                  struct fib6_config *cfg)
2044 {
2045         memset(cfg, 0, sizeof(*cfg));
2046
2047         cfg->fc_table = RT6_TABLE_MAIN;
2048         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2049         cfg->fc_metric = rtmsg->rtmsg_metric;
2050         cfg->fc_expires = rtmsg->rtmsg_info;
2051         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2052         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2053         cfg->fc_flags = rtmsg->rtmsg_flags;
2054
2055         cfg->fc_nlinfo.nl_net = net;
2056
2057         cfg->fc_dst = rtmsg->rtmsg_dst;
2058         cfg->fc_src = rtmsg->rtmsg_src;
2059         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2060 }
2061
2062 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2063 {
2064         struct fib6_config cfg;
2065         struct in6_rtmsg rtmsg;
2066         int err;
2067
2068         switch(cmd) {
2069         case SIOCADDRT:         /* Add a route */
2070         case SIOCDELRT:         /* Delete a route */
2071                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2072                         return -EPERM;
2073                 err = copy_from_user(&rtmsg, arg,
2074                                      sizeof(struct in6_rtmsg));
2075                 if (err)
2076                         return -EFAULT;
2077
2078                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2079
2080                 rtnl_lock();
2081                 switch (cmd) {
2082                 case SIOCADDRT:
2083                         err = ip6_route_add(&cfg);
2084                         break;
2085                 case SIOCDELRT:
2086                         err = ip6_route_del(&cfg);
2087                         break;
2088                 default:
2089                         err = -EINVAL;
2090                 }
2091                 rtnl_unlock();
2092
2093                 return err;
2094         }
2095
2096         return -EINVAL;
2097 }
2098
2099 /*
2100  *      Drop the packet on the floor
2101  */
2102
2103 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2104 {
2105         int type;
2106         struct dst_entry *dst = skb_dst(skb);
2107         switch (ipstats_mib_noroutes) {
2108         case IPSTATS_MIB_INNOROUTES:
2109                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2110                 if (type == IPV6_ADDR_ANY) {
2111                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2112                                       IPSTATS_MIB_INADDRERRORS);
2113                         break;
2114                 }
2115                 /* FALLTHROUGH */
2116         case IPSTATS_MIB_OUTNOROUTES:
2117                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2118                               ipstats_mib_noroutes);
2119                 break;
2120         }
2121         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2122         kfree_skb(skb);
2123         return 0;
2124 }
2125
2126 static int ip6_pkt_discard(struct sk_buff *skb)
2127 {
2128         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2129 }
2130
2131 static int ip6_pkt_discard_out(struct sk_buff *skb)
2132 {
2133         skb->dev = skb_dst(skb)->dev;
2134         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2135 }
2136
2137 static int ip6_pkt_prohibit(struct sk_buff *skb)
2138 {
2139         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2140 }
2141
2142 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2143 {
2144         skb->dev = skb_dst(skb)->dev;
2145         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2146 }
2147
2148 /*
2149  *      Allocate a dst for local (unicast / anycast) address.
2150  */
2151
2152 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2153                                     const struct in6_addr *addr,
2154                                     bool anycast)
2155 {
2156         struct net *net = dev_net(idev->dev);
2157         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2158                                             DST_NOCOUNT, NULL);
2159         if (!rt)
2160                 return ERR_PTR(-ENOMEM);
2161
2162         in6_dev_hold(idev);
2163
2164         rt->dst.flags |= DST_HOST;
2165         rt->dst.input = ip6_input;
2166         rt->dst.output = ip6_output;
2167         rt->rt6i_idev = idev;
2168
2169         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2170         if (anycast)
2171                 rt->rt6i_flags |= RTF_ANYCAST;
2172         else
2173                 rt->rt6i_flags |= RTF_LOCAL;
2174
2175         rt->rt6i_gateway  = *addr;
2176         rt->rt6i_dst.addr = *addr;
2177         rt->rt6i_dst.plen = 128;
2178         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2179
2180         atomic_set(&rt->dst.__refcnt, 1);
2181
2182         return rt;
2183 }
2184
2185 int ip6_route_get_saddr(struct net *net,
2186                         struct rt6_info *rt,
2187                         const struct in6_addr *daddr,
2188                         unsigned int prefs,
2189                         struct in6_addr *saddr)
2190 {
2191         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2192         int err = 0;
2193         if (rt->rt6i_prefsrc.plen)
2194                 *saddr = rt->rt6i_prefsrc.addr;
2195         else
2196                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2197                                          daddr, prefs, saddr);
2198         return err;
2199 }
2200
2201 /* remove deleted ip from prefsrc entries */
2202 struct arg_dev_net_ip {
2203         struct net_device *dev;
2204         struct net *net;
2205         struct in6_addr *addr;
2206 };
2207
2208 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2209 {
2210         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2211         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2212         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2213
2214         if (((void *)rt->dst.dev == dev || !dev) &&
2215             rt != net->ipv6.ip6_null_entry &&
2216             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2217                 /* remove prefsrc entry */
2218                 rt->rt6i_prefsrc.plen = 0;
2219         }
2220         return 0;
2221 }
2222
2223 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2224 {
2225         struct net *net = dev_net(ifp->idev->dev);
2226         struct arg_dev_net_ip adni = {
2227                 .dev = ifp->idev->dev,
2228                 .net = net,
2229                 .addr = &ifp->addr,
2230         };
2231         fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2232 }
2233
2234 struct arg_dev_net {
2235         struct net_device *dev;
2236         struct net *net;
2237 };
2238
2239 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2240 {
2241         const struct arg_dev_net *adn = arg;
2242         const struct net_device *dev = adn->dev;
2243
2244         if ((rt->dst.dev == dev || !dev) &&
2245             rt != adn->net->ipv6.ip6_null_entry)
2246                 return -1;
2247
2248         return 0;
2249 }
2250
2251 void rt6_ifdown(struct net *net, struct net_device *dev)
2252 {
2253         struct arg_dev_net adn = {
2254                 .dev = dev,
2255                 .net = net,
2256         };
2257
2258         fib6_clean_all(net, fib6_ifdown, &adn);
2259         icmp6_clean_all(fib6_ifdown, &adn);
2260 }
2261
2262 struct rt6_mtu_change_arg {
2263         struct net_device *dev;
2264         unsigned int mtu;
2265 };
2266
2267 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2268 {
2269         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2270         struct inet6_dev *idev;
2271
2272         /* In IPv6 pmtu discovery is not optional,
2273            so that RTAX_MTU lock cannot disable it.
2274            We still use this lock to block changes
2275            caused by addrconf/ndisc.
2276         */
2277
2278         idev = __in6_dev_get(arg->dev);
2279         if (!idev)
2280                 return 0;
2281
2282         /* For administrative MTU increase, there is no way to discover
2283            IPv6 PMTU increase, so PMTU increase should be updated here.
2284            Since RFC 1981 doesn't include administrative MTU increase
2285            update PMTU increase is a MUST. (i.e. jumbo frame)
2286          */
2287         /*
2288            If new MTU is less than route PMTU, this new MTU will be the
2289            lowest MTU in the path, update the route PMTU to reflect PMTU
2290            decreases; if new MTU is greater than route PMTU, and the
2291            old MTU is the lowest MTU in the path, update the route PMTU
2292            to reflect the increase. In this case if the other nodes' MTU
2293            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2294            PMTU discouvery.
2295          */
2296         if (rt->dst.dev == arg->dev &&
2297             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2298             (dst_mtu(&rt->dst) >= arg->mtu ||
2299              (dst_mtu(&rt->dst) < arg->mtu &&
2300               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2301                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2302         }
2303         return 0;
2304 }
2305
2306 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2307 {
2308         struct rt6_mtu_change_arg arg = {
2309                 .dev = dev,
2310                 .mtu = mtu,
2311         };
2312
2313         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2314 }
2315
2316 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2317         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2318         [RTA_OIF]               = { .type = NLA_U32 },
2319         [RTA_IIF]               = { .type = NLA_U32 },
2320         [RTA_PRIORITY]          = { .type = NLA_U32 },
2321         [RTA_METRICS]           = { .type = NLA_NESTED },
2322         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2323 };
2324
2325 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2326                               struct fib6_config *cfg)
2327 {
2328         struct rtmsg *rtm;
2329         struct nlattr *tb[RTA_MAX+1];
2330         int err;
2331
2332         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2333         if (err < 0)
2334                 goto errout;
2335
2336         err = -EINVAL;
2337         rtm = nlmsg_data(nlh);
2338         memset(cfg, 0, sizeof(*cfg));
2339
2340         cfg->fc_table = rtm->rtm_table;
2341         cfg->fc_dst_len = rtm->rtm_dst_len;
2342         cfg->fc_src_len = rtm->rtm_src_len;
2343         cfg->fc_flags = RTF_UP;
2344         cfg->fc_protocol = rtm->rtm_protocol;
2345         cfg->fc_type = rtm->rtm_type;
2346
2347         if (rtm->rtm_type == RTN_UNREACHABLE ||
2348             rtm->rtm_type == RTN_BLACKHOLE ||
2349             rtm->rtm_type == RTN_PROHIBIT ||
2350             rtm->rtm_type == RTN_THROW)
2351                 cfg->fc_flags |= RTF_REJECT;
2352
2353         if (rtm->rtm_type == RTN_LOCAL)
2354                 cfg->fc_flags |= RTF_LOCAL;
2355
2356         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2357         cfg->fc_nlinfo.nlh = nlh;
2358         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2359
2360         if (tb[RTA_GATEWAY]) {
2361                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2362                 cfg->fc_flags |= RTF_GATEWAY;
2363         }
2364
2365         if (tb[RTA_DST]) {
2366                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2367
2368                 if (nla_len(tb[RTA_DST]) < plen)
2369                         goto errout;
2370
2371                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2372         }
2373
2374         if (tb[RTA_SRC]) {
2375                 int plen = (rtm->rtm_src_len + 7) >> 3;
2376
2377                 if (nla_len(tb[RTA_SRC]) < plen)
2378                         goto errout;
2379
2380                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2381         }
2382
2383         if (tb[RTA_PREFSRC])
2384                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2385
2386         if (tb[RTA_OIF])
2387                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2388
2389         if (tb[RTA_PRIORITY])
2390                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2391
2392         if (tb[RTA_METRICS]) {
2393                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2394                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2395         }
2396
2397         if (tb[RTA_TABLE])
2398                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2399
2400         if (tb[RTA_MULTIPATH]) {
2401                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2402                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2403         }
2404
2405         err = 0;
2406 errout:
2407         return err;
2408 }
2409
2410 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2411 {
2412         struct fib6_config r_cfg;
2413         struct rtnexthop *rtnh;
2414         int remaining;
2415         int attrlen;
2416         int err = 0, last_err = 0;
2417
2418 beginning:
2419         rtnh = (struct rtnexthop *)cfg->fc_mp;
2420         remaining = cfg->fc_mp_len;
2421
2422         /* Parse a Multipath Entry */
2423         while (rtnh_ok(rtnh, remaining)) {
2424                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2425                 if (rtnh->rtnh_ifindex)
2426                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2427
2428                 attrlen = rtnh_attrlen(rtnh);
2429                 if (attrlen > 0) {
2430                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2431
2432                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2433                         if (nla) {
2434                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2435                                 r_cfg.fc_flags |= RTF_GATEWAY;
2436                         }
2437                 }
2438                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2439                 if (err) {
2440                         last_err = err;
2441                         /* If we are trying to remove a route, do not stop the
2442                          * loop when ip6_route_del() fails (because next hop is
2443                          * already gone), we should try to remove all next hops.
2444                          */
2445                         if (add) {
2446                                 /* If add fails, we should try to delete all
2447                                  * next hops that have been already added.
2448                                  */
2449                                 add = 0;
2450                                 goto beginning;
2451                         }
2452                 }
2453                 /* Because each route is added like a single route we remove
2454                  * this flag after the first nexthop (if there is a collision,
2455                  * we have already fail to add the first nexthop:
2456                  * fib6_add_rt2node() has reject it).
2457                  */
2458                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2459                 rtnh = rtnh_next(rtnh, &remaining);
2460         }
2461
2462         return last_err;
2463 }
2464
2465 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2466 {
2467         struct fib6_config cfg;
2468         int err;
2469
2470         err = rtm_to_fib6_config(skb, nlh, &cfg);
2471         if (err < 0)
2472                 return err;
2473
2474         if (cfg.fc_mp)
2475                 return ip6_route_multipath(&cfg, 0);
2476         else
2477                 return ip6_route_del(&cfg);
2478 }
2479
2480 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2481 {
2482         struct fib6_config cfg;
2483         int err;
2484
2485         err = rtm_to_fib6_config(skb, nlh, &cfg);
2486         if (err < 0)
2487                 return err;
2488
2489         if (cfg.fc_mp)
2490                 return ip6_route_multipath(&cfg, 1);
2491         else
2492                 return ip6_route_add(&cfg);
2493 }
2494
2495 static inline size_t rt6_nlmsg_size(void)
2496 {
2497         return NLMSG_ALIGN(sizeof(struct rtmsg))
2498                + nla_total_size(16) /* RTA_SRC */
2499                + nla_total_size(16) /* RTA_DST */
2500                + nla_total_size(16) /* RTA_GATEWAY */
2501                + nla_total_size(16) /* RTA_PREFSRC */
2502                + nla_total_size(4) /* RTA_TABLE */
2503                + nla_total_size(4) /* RTA_IIF */
2504                + nla_total_size(4) /* RTA_OIF */
2505                + nla_total_size(4) /* RTA_PRIORITY */
2506                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2507                + nla_total_size(sizeof(struct rta_cacheinfo));
2508 }
2509
2510 static int rt6_fill_node(struct net *net,
2511                          struct sk_buff *skb, struct rt6_info *rt,
2512                          struct in6_addr *dst, struct in6_addr *src,
2513                          int iif, int type, u32 portid, u32 seq,
2514                          int prefix, int nowait, unsigned int flags)
2515 {
2516         struct rtmsg *rtm;
2517         struct nlmsghdr *nlh;
2518         long expires;
2519         u32 table;
2520
2521         if (prefix) {   /* user wants prefix routes only */
2522                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2523                         /* success since this is not a prefix route */
2524                         return 1;
2525                 }
2526         }
2527
2528         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2529         if (!nlh)
2530                 return -EMSGSIZE;
2531
2532         rtm = nlmsg_data(nlh);
2533         rtm->rtm_family = AF_INET6;
2534         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2535         rtm->rtm_src_len = rt->rt6i_src.plen;
2536         rtm->rtm_tos = 0;
2537         if (rt->rt6i_table)
2538                 table = rt->rt6i_table->tb6_id;
2539         else
2540                 table = RT6_TABLE_UNSPEC;
2541         rtm->rtm_table = table;
2542         if (nla_put_u32(skb, RTA_TABLE, table))
2543                 goto nla_put_failure;
2544         if (rt->rt6i_flags & RTF_REJECT) {
2545                 switch (rt->dst.error) {
2546                 case -EINVAL:
2547                         rtm->rtm_type = RTN_BLACKHOLE;
2548                         break;
2549                 case -EACCES:
2550                         rtm->rtm_type = RTN_PROHIBIT;
2551                         break;
2552                 case -EAGAIN:
2553                         rtm->rtm_type = RTN_THROW;
2554                         break;
2555                 default:
2556                         rtm->rtm_type = RTN_UNREACHABLE;
2557                         break;
2558                 }
2559         }
2560         else if (rt->rt6i_flags & RTF_LOCAL)
2561                 rtm->rtm_type = RTN_LOCAL;
2562         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2563                 rtm->rtm_type = RTN_LOCAL;
2564         else
2565                 rtm->rtm_type = RTN_UNICAST;
2566         rtm->rtm_flags = 0;
2567         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2568         rtm->rtm_protocol = rt->rt6i_protocol;
2569         if (rt->rt6i_flags & RTF_DYNAMIC)
2570                 rtm->rtm_protocol = RTPROT_REDIRECT;
2571         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2572                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2573                         rtm->rtm_protocol = RTPROT_RA;
2574                 else
2575                         rtm->rtm_protocol = RTPROT_KERNEL;
2576         }
2577
2578         if (rt->rt6i_flags & RTF_CACHE)
2579                 rtm->rtm_flags |= RTM_F_CLONED;
2580
2581         if (dst) {
2582                 if (nla_put(skb, RTA_DST, 16, dst))
2583                         goto nla_put_failure;
2584                 rtm->rtm_dst_len = 128;
2585         } else if (rtm->rtm_dst_len)
2586                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2587                         goto nla_put_failure;
2588 #ifdef CONFIG_IPV6_SUBTREES
2589         if (src) {
2590                 if (nla_put(skb, RTA_SRC, 16, src))
2591                         goto nla_put_failure;
2592                 rtm->rtm_src_len = 128;
2593         } else if (rtm->rtm_src_len &&
2594                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2595                 goto nla_put_failure;
2596 #endif
2597         if (iif) {
2598 #ifdef CONFIG_IPV6_MROUTE
2599                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2600                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2601                         if (err <= 0) {
2602                                 if (!nowait) {
2603                                         if (err == 0)
2604                                                 return 0;
2605                                         goto nla_put_failure;
2606                                 } else {
2607                                         if (err == -EMSGSIZE)
2608                                                 goto nla_put_failure;
2609                                 }
2610                         }
2611                 } else
2612 #endif
2613                         if (nla_put_u32(skb, RTA_IIF, iif))
2614                                 goto nla_put_failure;
2615         } else if (dst) {
2616                 struct in6_addr saddr_buf;
2617                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2618                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2619                         goto nla_put_failure;
2620         }
2621
2622         if (rt->rt6i_prefsrc.plen) {
2623                 struct in6_addr saddr_buf;
2624                 saddr_buf = rt->rt6i_prefsrc.addr;
2625                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2626                         goto nla_put_failure;
2627         }
2628
2629         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2630                 goto nla_put_failure;
2631
2632         if (rt->rt6i_flags & RTF_GATEWAY) {
2633                 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2634                         goto nla_put_failure;
2635         }
2636
2637         if (rt->dst.dev &&
2638             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2639                 goto nla_put_failure;
2640         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2641                 goto nla_put_failure;
2642
2643         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2644
2645         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2646                 goto nla_put_failure;
2647
2648         return nlmsg_end(skb, nlh);
2649
2650 nla_put_failure:
2651         nlmsg_cancel(skb, nlh);
2652         return -EMSGSIZE;
2653 }
2654
2655 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2656 {
2657         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2658         int prefix;
2659
2660         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2661                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2662                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2663         } else
2664                 prefix = 0;
2665
2666         return rt6_fill_node(arg->net,
2667                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2668                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2669                      prefix, 0, NLM_F_MULTI);
2670 }
2671
2672 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2673 {
2674         struct net *net = sock_net(in_skb->sk);
2675         struct nlattr *tb[RTA_MAX+1];
2676         struct rt6_info *rt;
2677         struct sk_buff *skb;
2678         struct rtmsg *rtm;
2679         struct flowi6 fl6;
2680         int err, iif = 0, oif = 0;
2681
2682         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2683         if (err < 0)
2684                 goto errout;
2685
2686         err = -EINVAL;
2687         memset(&fl6, 0, sizeof(fl6));
2688
2689         if (tb[RTA_SRC]) {
2690                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2691                         goto errout;
2692
2693                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2694         }
2695
2696         if (tb[RTA_DST]) {
2697                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2698                         goto errout;
2699
2700                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2701         }
2702
2703         if (tb[RTA_IIF])
2704                 iif = nla_get_u32(tb[RTA_IIF]);
2705
2706         if (tb[RTA_OIF])
2707                 oif = nla_get_u32(tb[RTA_OIF]);
2708
2709         if (iif) {
2710                 struct net_device *dev;
2711                 int flags = 0;
2712
2713                 dev = __dev_get_by_index(net, iif);
2714                 if (!dev) {
2715                         err = -ENODEV;
2716                         goto errout;
2717                 }
2718
2719                 fl6.flowi6_iif = iif;
2720
2721                 if (!ipv6_addr_any(&fl6.saddr))
2722                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2723
2724                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2725                                                                flags);
2726         } else {
2727                 fl6.flowi6_oif = oif;
2728
2729                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2730         }
2731
2732         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2733         if (!skb) {
2734                 ip6_rt_put(rt);
2735                 err = -ENOBUFS;
2736                 goto errout;
2737         }
2738
2739         /* Reserve room for dummy headers, this skb can pass
2740            through good chunk of routing engine.
2741          */
2742         skb_reset_mac_header(skb);
2743         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2744
2745         skb_dst_set(skb, &rt->dst);
2746
2747         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2748                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2749                             nlh->nlmsg_seq, 0, 0, 0);
2750         if (err < 0) {
2751                 kfree_skb(skb);
2752                 goto errout;
2753         }
2754
2755         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2756 errout:
2757         return err;
2758 }
2759
2760 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2761 {
2762         struct sk_buff *skb;
2763         struct net *net = info->nl_net;
2764         u32 seq;
2765         int err;
2766
2767         err = -ENOBUFS;
2768         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2769
2770         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2771         if (!skb)
2772                 goto errout;
2773
2774         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2775                                 event, info->portid, seq, 0, 0, 0);
2776         if (err < 0) {
2777                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2778                 WARN_ON(err == -EMSGSIZE);
2779                 kfree_skb(skb);
2780                 goto errout;
2781         }
2782         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2783                     info->nlh, gfp_any());
2784         return;
2785 errout:
2786         if (err < 0)
2787                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2788 }
2789
2790 static int ip6_route_dev_notify(struct notifier_block *this,
2791                                 unsigned long event, void *ptr)
2792 {
2793         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2794         struct net *net = dev_net(dev);
2795
2796         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2797                 net->ipv6.ip6_null_entry->dst.dev = dev;
2798                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2799 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2800                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2801                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2802                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2803                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2804 #endif
2805         }
2806
2807         return NOTIFY_OK;
2808 }
2809
2810 /*
2811  *      /proc
2812  */
2813
2814 #ifdef CONFIG_PROC_FS
2815
2816 static const struct file_operations ipv6_route_proc_fops = {
2817         .owner          = THIS_MODULE,
2818         .open           = ipv6_route_open,
2819         .read           = seq_read,
2820         .llseek         = seq_lseek,
2821         .release        = seq_release_net,
2822 };
2823
2824 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2825 {
2826         struct net *net = (struct net *)seq->private;
2827         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2828                    net->ipv6.rt6_stats->fib_nodes,
2829                    net->ipv6.rt6_stats->fib_route_nodes,
2830                    net->ipv6.rt6_stats->fib_rt_alloc,
2831                    net->ipv6.rt6_stats->fib_rt_entries,
2832                    net->ipv6.rt6_stats->fib_rt_cache,
2833                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2834                    net->ipv6.rt6_stats->fib_discarded_routes);
2835
2836         return 0;
2837 }
2838
2839 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2840 {
2841         return single_open_net(inode, file, rt6_stats_seq_show);
2842 }
2843
2844 static const struct file_operations rt6_stats_seq_fops = {
2845         .owner   = THIS_MODULE,
2846         .open    = rt6_stats_seq_open,
2847         .read    = seq_read,
2848         .llseek  = seq_lseek,
2849         .release = single_release_net,
2850 };
2851 #endif  /* CONFIG_PROC_FS */
2852
2853 #ifdef CONFIG_SYSCTL
2854
2855 static
2856 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2857                               void __user *buffer, size_t *lenp, loff_t *ppos)
2858 {
2859         struct net *net;
2860         int delay;
2861         if (!write)
2862                 return -EINVAL;
2863
2864         net = (struct net *)ctl->extra1;
2865         delay = net->ipv6.sysctl.flush_delay;
2866         proc_dointvec(ctl, write, buffer, lenp, ppos);
2867         fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2868         return 0;
2869 }
2870
2871 struct ctl_table ipv6_route_table_template[] = {
2872         {
2873                 .procname       =       "flush",
2874                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2875                 .maxlen         =       sizeof(int),
2876                 .mode           =       0200,
2877                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2878         },
2879         {
2880                 .procname       =       "gc_thresh",
2881                 .data           =       &ip6_dst_ops_template.gc_thresh,
2882                 .maxlen         =       sizeof(int),
2883                 .mode           =       0644,
2884                 .proc_handler   =       proc_dointvec,
2885         },
2886         {
2887                 .procname       =       "max_size",
2888                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2889                 .maxlen         =       sizeof(int),
2890                 .mode           =       0644,
2891                 .proc_handler   =       proc_dointvec,
2892         },
2893         {
2894                 .procname       =       "gc_min_interval",
2895                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2896                 .maxlen         =       sizeof(int),
2897                 .mode           =       0644,
2898                 .proc_handler   =       proc_dointvec_jiffies,
2899         },
2900         {
2901                 .procname       =       "gc_timeout",
2902                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2903                 .maxlen         =       sizeof(int),
2904                 .mode           =       0644,
2905                 .proc_handler   =       proc_dointvec_jiffies,
2906         },
2907         {
2908                 .procname       =       "gc_interval",
2909                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2910                 .maxlen         =       sizeof(int),
2911                 .mode           =       0644,
2912                 .proc_handler   =       proc_dointvec_jiffies,
2913         },
2914         {
2915                 .procname       =       "gc_elasticity",
2916                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2917                 .maxlen         =       sizeof(int),
2918                 .mode           =       0644,
2919                 .proc_handler   =       proc_dointvec,
2920         },
2921         {
2922                 .procname       =       "mtu_expires",
2923                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2924                 .maxlen         =       sizeof(int),
2925                 .mode           =       0644,
2926                 .proc_handler   =       proc_dointvec_jiffies,
2927         },
2928         {
2929                 .procname       =       "min_adv_mss",
2930                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2931                 .maxlen         =       sizeof(int),
2932                 .mode           =       0644,
2933                 .proc_handler   =       proc_dointvec,
2934         },
2935         {
2936                 .procname       =       "gc_min_interval_ms",
2937                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2938                 .maxlen         =       sizeof(int),
2939                 .mode           =       0644,
2940                 .proc_handler   =       proc_dointvec_ms_jiffies,
2941         },
2942         { }
2943 };
2944
2945 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2946 {
2947         struct ctl_table *table;
2948
2949         table = kmemdup(ipv6_route_table_template,
2950                         sizeof(ipv6_route_table_template),
2951                         GFP_KERNEL);
2952
2953         if (table) {
2954                 table[0].data = &net->ipv6.sysctl.flush_delay;
2955                 table[0].extra1 = net;
2956                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2957                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2958                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2959                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2960                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2961                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2962                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2963                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2964                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2965
2966                 /* Don't export sysctls to unprivileged users */
2967                 if (net->user_ns != &init_user_ns)
2968                         table[0].procname = NULL;
2969         }
2970
2971         return table;
2972 }
2973 #endif
2974
2975 static int __net_init ip6_route_net_init(struct net *net)
2976 {
2977         int ret = -ENOMEM;
2978
2979         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2980                sizeof(net->ipv6.ip6_dst_ops));
2981
2982         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2983                 goto out_ip6_dst_ops;
2984
2985         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2986                                            sizeof(*net->ipv6.ip6_null_entry),
2987                                            GFP_KERNEL);
2988         if (!net->ipv6.ip6_null_entry)
2989                 goto out_ip6_dst_entries;
2990         net->ipv6.ip6_null_entry->dst.path =
2991                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2992         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2993         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2994                          ip6_template_metrics, true);
2995
2996 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2997         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2998                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2999                                                GFP_KERNEL);
3000         if (!net->ipv6.ip6_prohibit_entry)
3001                 goto out_ip6_null_entry;
3002         net->ipv6.ip6_prohibit_entry->dst.path =
3003                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3004         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3005         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3006                          ip6_template_metrics, true);
3007
3008         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3009                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3010                                                GFP_KERNEL);
3011         if (!net->ipv6.ip6_blk_hole_entry)
3012                 goto out_ip6_prohibit_entry;
3013         net->ipv6.ip6_blk_hole_entry->dst.path =
3014                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3015         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3016         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3017                          ip6_template_metrics, true);
3018 #endif
3019
3020         net->ipv6.sysctl.flush_delay = 0;
3021         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3022         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3023         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3024         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3025         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3026         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3027         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3028
3029         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3030
3031         ret = 0;
3032 out:
3033         return ret;
3034
3035 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3036 out_ip6_prohibit_entry:
3037         kfree(net->ipv6.ip6_prohibit_entry);
3038 out_ip6_null_entry:
3039         kfree(net->ipv6.ip6_null_entry);
3040 #endif
3041 out_ip6_dst_entries:
3042         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3043 out_ip6_dst_ops:
3044         goto out;
3045 }
3046
3047 static void __net_exit ip6_route_net_exit(struct net *net)
3048 {
3049         kfree(net->ipv6.ip6_null_entry);
3050 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3051         kfree(net->ipv6.ip6_prohibit_entry);
3052         kfree(net->ipv6.ip6_blk_hole_entry);
3053 #endif
3054         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3055 }
3056
3057 static int __net_init ip6_route_net_init_late(struct net *net)
3058 {
3059 #ifdef CONFIG_PROC_FS
3060         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3061         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3062 #endif
3063         return 0;
3064 }
3065
3066 static void __net_exit ip6_route_net_exit_late(struct net *net)
3067 {
3068 #ifdef CONFIG_PROC_FS
3069         remove_proc_entry("ipv6_route", net->proc_net);
3070         remove_proc_entry("rt6_stats", net->proc_net);
3071 #endif
3072 }
3073
3074 static struct pernet_operations ip6_route_net_ops = {
3075         .init = ip6_route_net_init,
3076         .exit = ip6_route_net_exit,
3077 };
3078
3079 static int __net_init ipv6_inetpeer_init(struct net *net)
3080 {
3081         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3082
3083         if (!bp)
3084                 return -ENOMEM;
3085         inet_peer_base_init(bp);
3086         net->ipv6.peers = bp;
3087         return 0;
3088 }
3089
3090 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3091 {
3092         struct inet_peer_base *bp = net->ipv6.peers;
3093
3094         net->ipv6.peers = NULL;
3095         inetpeer_invalidate_tree(bp);
3096         kfree(bp);
3097 }
3098
3099 static struct pernet_operations ipv6_inetpeer_ops = {
3100         .init   =       ipv6_inetpeer_init,
3101         .exit   =       ipv6_inetpeer_exit,
3102 };
3103
3104 static struct pernet_operations ip6_route_net_late_ops = {
3105         .init = ip6_route_net_init_late,
3106         .exit = ip6_route_net_exit_late,
3107 };
3108
3109 static struct notifier_block ip6_route_dev_notifier = {
3110         .notifier_call = ip6_route_dev_notify,
3111         .priority = 0,
3112 };
3113
3114 int __init ip6_route_init(void)
3115 {
3116         int ret;
3117
3118         ret = -ENOMEM;
3119         ip6_dst_ops_template.kmem_cachep =
3120                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3121                                   SLAB_HWCACHE_ALIGN, NULL);
3122         if (!ip6_dst_ops_template.kmem_cachep)
3123                 goto out;
3124
3125         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3126         if (ret)
3127                 goto out_kmem_cache;
3128
3129         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3130         if (ret)
3131                 goto out_dst_entries;
3132
3133         ret = register_pernet_subsys(&ip6_route_net_ops);
3134         if (ret)
3135                 goto out_register_inetpeer;
3136
3137         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3138
3139         /* Registering of the loopback is done before this portion of code,
3140          * the loopback reference in rt6_info will not be taken, do it
3141          * manually for init_net */
3142         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3143         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3144   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3145         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3146         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3147         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3148         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3149   #endif
3150         ret = fib6_init();
3151         if (ret)
3152                 goto out_register_subsys;
3153
3154         ret = xfrm6_init();
3155         if (ret)
3156                 goto out_fib6_init;
3157
3158         ret = fib6_rules_init();
3159         if (ret)
3160                 goto xfrm6_init;
3161
3162         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3163         if (ret)
3164                 goto fib6_rules_init;
3165
3166         ret = -ENOBUFS;
3167         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3168             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3169             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3170                 goto out_register_late_subsys;
3171
3172         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3173         if (ret)
3174                 goto out_register_late_subsys;
3175
3176 out:
3177         return ret;
3178
3179 out_register_late_subsys:
3180         unregister_pernet_subsys(&ip6_route_net_late_ops);
3181 fib6_rules_init:
3182         fib6_rules_cleanup();
3183 xfrm6_init:
3184         xfrm6_fini();
3185 out_fib6_init:
3186         fib6_gc_cleanup();
3187 out_register_subsys:
3188         unregister_pernet_subsys(&ip6_route_net_ops);
3189 out_register_inetpeer:
3190         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3191 out_dst_entries:
3192         dst_entries_destroy(&ip6_dst_blackhole_ops);
3193 out_kmem_cache:
3194         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3195         goto out;
3196 }
3197
3198 void ip6_route_cleanup(void)
3199 {
3200         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3201         unregister_pernet_subsys(&ip6_route_net_late_ops);
3202         fib6_rules_cleanup();
3203         xfrm6_fini();
3204         fib6_gc_cleanup();
3205         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3206         unregister_pernet_subsys(&ip6_route_net_ops);
3207         dst_entries_destroy(&ip6_dst_blackhole_ops);
3208         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3209 }